diff --git a/tools/profiling/python/h5toctf.py b/tools/profiling/python/h5toctf.py new file mode 100644 index 000000000..ea2adaf1e --- /dev/null +++ b/tools/profiling/python/h5toctf.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 + +try: + import os + import numpy as np + import time + import pandas + import sys +except ModuleNotFoundError: + print("Did not find a system module, use pip to install it") + +try: + import parsec_trace_tables as ptt + import pbt2ptt +except ModuleNotFoundError: + print("Did not find pbt2ptt, you are likely using python version that does not match the version used to build PaRSEC profiling tools") + print(sys.path) + +import json +import re +import sys +import math +import argparse + +def bool(str): + return str.lower() in ["true", "yes", "y", "1", "t"] + +def h5_to_ctf(ptt_filename, ctf_filename, **kwargs): + print(f"Converting {ptt_filename} into {ctf_filename}") + + skip_parsec_events = bool(kwargs.get('skip_parsec_events', True)) + skip_mpi_events = bool(kwargs.get('skip_mpi_events', True)) + key_is_part_of_task_name = bool(kwargs.get('key_is_part_of_task_name', False)) + + ctf_data = {"traceEvents": []} + + print(f" Reading {ptt_filename}...", end='', flush=True) + trace = ptt.from_hdf(ptt_filename) + print(f" Done") + + print(f" Building dictionary of events...", end='', flush=True) + for tid, t in trace.streams.iterrows(): + mt_event = {} + mt_event['name'] = 'thread_name' + mt_event['ph'] = 'M' + mt_event['pid'] = t.node_id + mt_event['tid'] = t.stream_id + + tokens = t.description.split() + if tokens[0] == 'PaRSEC' and tokens[1] == 'Thread' and tokens[3] == 'of' and tokens[4] == 'VP' and tokens[6] == 'Bound' and tokens[7] == 'on': + mt_event['args'] = { 'name': f"PaRSEC Thread {tokens[2]}" } + else: + mt_event['args'] = { 'name': t.description } + ctf_data["traceEvents"].append(mt_event) + + mt_event = {} + mt_event['name'] = 'thread_sort_index' + mt_event['ph'] = 'M' + mt_event['pid'] = t.node_id + mt_event['tid'] = t.stream_id + mt_event['args'] = {'sort_index': t.th_id } + ctf_data["traceEvents"].append(mt_event) + + for nid, n in trace.nodes.iterrows(): + mt_event = {} + mt_event['name'] = 'process_name' + mt_event['ph'] = 'M' + mt_event['pid'] = nid + mt_event['args'] = { 'name': n.hostname } + ctf_data["traceEvents"].append(mt_event) + + mt_event = {} + mt_event['name'] = 'process_sort_index' + mt_event['ph'] = 'M' + mt_event['pid'] = nid + mt_event['args'] = { 'sort_index': nid } + ctf_data["traceEvents"].append(mt_event) + + for e in trace.events.itertuples(): + if(skip_parsec_events == True and trace.event_names[e.type].startswith("PARSEC")): + continue + if(skip_mpi_events == True and trace.event_names[e.type].startswith("MPI")): + continue + + ctf_event = {} + ctf_event["ph"] = "X" # complete event type + ctf_event["ts"] = 0.001 * e.begin # when we started, in ms + ctf_event["dur"] = 0.001 * (e.end - e.begin) # when we started, in ms + category = trace.event_names[e.type] + ctf_event["cat"] = category + + if e.key is not None: + key = e.key.decode('utf-8').rstrip('\x00') + ctf_event["args"] = { "key": key } + if key_is_part_of_task_name: + ctf_event["name"] = category+"<"+key+">" + else: + ctf_event["name"] = category + else: + ctf_event["name"] = category + + ctf_event["pid"] = e.node_id + tid = e.stream_id + ctf_event["tid"] = 111111 if math.isnan(tid) else int(tid) + + ctf_data["traceEvents"].append(ctf_event) + print(f" Done") + + print(f" Generating {ctf_filename}...", end='', flush=True) + with open(ctf_filename, "w") as chrome_trace: + json.dump(ctf_data, chrome_trace) + print(f" Done") + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( prog='h5toctf', + description='Convert a HDF5 PaRSEC profile file into a Perfetto profiling in JSON format') + parser.add_argument('ptt_file_name', nargs=1, action='store', help='PaRSEC HDF5 profile file') + parser.add_argument('ctf_file_name', nargs=1, action='store', help='Name of the generated JSON file') + parser.add_argument('--show-parsec-events', action='store_const', const='False', default='True', dest='skip_parsec_events', help='Include internal PaRSEC events in the trace' ) + parser.add_argument('--show-mpi-events', action='store_const', const='False', default='True', dest='skip_mpi_events', help='Include internal MPI events (generated by the PaRSEC internal comm engine) in the trace' ) + parser.add_argument('--key-is-part-of-task-name', action='store_const', const='True', default='False', dest='key_is_part_of_task_name', help='Include the key in the task name' ) + args = parser.parse_args() + + h5_to_ctf(args.ptt_file_name[0], args.ctf_file_name[0], **vars(args)) diff --git a/tools/profiling/python/pbt2ctf.py b/tools/profiling/python/pbt2ctf.py new file mode 100644 index 000000000..307b9fe91 --- /dev/null +++ b/tools/profiling/python/pbt2ctf.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 + +import h5toctf +import sys +import os +import argparse + +try: + import pbt2ptt +except ModuleNotFoundError: + print("Did not find pbt2ptt, you are likely using python version that does not match the version used to build PaRSEC profiling tools") + print(sys.path) + +def bool(str): + return str.lower() in ["true", "yes", "y", "1", "t"] + +def pbt_to_ctf(pbt_files_list, ctf_filename, **kwargs): + print(f"Converting {pbt_files_list} into a HDF5 File") + ptt_filename = pbt2ptt.convert(pbt_files_list, multiprocess=False) + h5toctf.h5_to_ctf(ptt_filename, ctf_filename, **kwargs) + +if __name__ == "__main__": + parser = argparse.ArgumentParser( prof='pbt2ctf', + description='Convert a set of PaRSEC binary profile files into a Perfetto profiling in JSON format') + parser.add_argument('pbt_file_prefix', nargs=1, action='store', help='Prefix of the PaRSEC Binary Profile files') + parser.add_argument('ctf_file_name', nargs=1, action='store', help='Name of the generated JSON file') + parser.add_argument('--show-parsec-events', action='store_const', const='False', default='True', dest='skip_parsec_events', help='Include internal PaRSEC events in the trace' ) + parser.add_argument('--show-mpi-events', action='store_const', const='False', default='True', dest='skip_mpi_events', help='Include internal MPI events (generated by the PaRSEC internal comm engine) in the trace' ) + parser.add_argument('--key-is-part-of-task-name', action='store_const', const='True', default='False', dest='key_is_part_of_task_name', help='Include the key in the task name' ) + args = parser.parse_args() + + # iterate over all files within the directory that start with sys.argv[1] + pbt_files_list=[] + dirname = os.path.dirname(args.pbt_file_prefix[0]) + for file in os.listdir(dirname): + file_fullname = os.path.join(dirname,file) + if file_fullname.startswith(args.pbt_file_prefix[0]) and file_fullname.endswith(".prof") and file_fullname != args.ctf_file_name[0]: + print("found file ", file_fullname) + pbt_files_list.append(file_fullname) + + # to debug: read_pbt(pbt_files_list[0]), etc. + pbt_to_ctf(pbt_files_list[0], args.ctf_file_name[0], **vars(args))