diff --git a/hatchet/graphframe.py b/hatchet/graphframe.py index ee81e773..70727d6c 100644 --- a/hatchet/graphframe.py +++ b/hatchet/graphframe.py @@ -39,6 +39,8 @@ traceback.print_exc() raise +from typing import Dict, List + def parallel_apply(filter_function, dataframe, queue): """A function called in parallel, which does a pandas apply on part of a @@ -113,18 +115,44 @@ def from_hpctoolkit(dirname): @staticmethod def from_hpctoolkit_latest( dirname: str, + directory_mapping: Dict[str, str] = None, + parallel_profiles_mode: bool = False, max_depth: int = None, min_percentage_of_application_time: int = None, - min_percentage_of_parent_time: int = None, + exclude_mpi_function_details: bool = False, + exclude_openmp_function_details: bool = False, + exclude_cuda_function_details: bool = False, + exclude_system_libraries_source_code: bool = False, + exclude_function_call_lines: bool = False, + exclude_no_source_code_instructions: bool = False, + exclude_instructions: bool = False, + exclude_non_function_nodes: bool = False, + label_function_nodes: bool = True, + metric_names: List[str] = None, + metric_scopes: List[str] = None, + summary_metrics: List[str] = None, + profile_ranks: List[int] = None, ): """ Read an HPCToolkit database directory into a new GraphFrame Arguments: dirname (str): directory of an HPCToolkit performance database - max_depth (int): maximum depth that nodes in the CCT can have to be imported in Hatchet - min_percentage_of_application_time (int): minimum percentage of application time that nodes in the CCT must have to be imported in Hatchet - min_percentage_of_parent_time (int): minimum percentage of parent time that nodes in the CCT must have to be imported in Hatchet + directory_mapping (dict): Python dictionary that maps file system location to a name + parallel_profiles_mode (bool): flag whether the reader should extract parallel profiles from the database (true) or the summary profile (false) + max_depth (int): maximum depth that nodes in the tree should have + min_percentage_of_application_time (int): minimum percentage of the application time that nodes in the tree should have + exclude_mpi_function_details (bool): flag whether to exclude subtrees of MPI function nodes + exclude_openmp_function_details (bool): flag whether to exclude subtrees of OpenMP function nodes + exclude_cuda_function_details (bool): flag whether to exclude subtrees of CUDA function nodes + exclude_system_libraries_source_code (bool): flag whether the reader should exclude subtree of a system library node + exclude_function_call_lines (bool): flag whether to exclude source line nodes that represent place of a function call + exclude_no_source_code_instructions (bool): flag whether to exclude nodes with no source code mapping information + label_function_nodes (bool): flag whether to label function nodes with 'function' prefix + metric_names (list): list of metrics to extract for selected profiles + metric_scopes (list): list of metric scopes to extract for selected profiles + summary_metrics (list): list of summary metrics to extract from the summary profile + profile_ranks (list): list of MPI ranks that specify which parallel profiles to extract Returns: (GraphFrame): new GraphFrame containing HPCToolkit profile data @@ -134,9 +162,23 @@ def from_hpctoolkit_latest( return HPCToolkitReaderLatest( dirname, + directory_mapping=directory_mapping, + parallel_profiles_mode=parallel_profiles_mode, max_depth=max_depth, min_application_percentage_time=min_percentage_of_application_time, - min_parent_percentage_time=min_percentage_of_parent_time, + exclude_mpi_function_details=exclude_mpi_function_details, + exclude_openmp_function_details=exclude_openmp_function_details, + exclude_cuda_function_details=exclude_cuda_function_details, + exclude_system_libraries_source_code=exclude_system_libraries_source_code, + exclude_function_call_lines=exclude_function_call_lines, + exclude_no_source_code_instructions=exclude_no_source_code_instructions, + exclude_instructions=exclude_instructions, + exclude_non_function_nodes=exclude_non_function_nodes, + label_function_nodes=label_function_nodes, + metric_names=metric_names, + metric_scopes=metric_scopes, + summary_metrics=summary_metrics, + profile_ranks=profile_ranks, ).read() @staticmethod diff --git a/hatchet/readers/hpctoolkit_reader_latest.py b/hatchet/readers/hpctoolkit_reader_latest.py index 237d47dd..968c3db6 100644 --- a/hatchet/readers/hpctoolkit_reader_latest.py +++ b/hatchet/readers/hpctoolkit_reader_latest.py @@ -6,7 +6,8 @@ import os import re import struct -from typing import Dict, Union +from datetime import datetime +from typing import Dict, List, Tuple, Union import pandas as pd @@ -16,6 +17,28 @@ from hatchet.node import Node +def binary_search( + format: str, data: bytes, low: int, high: int, target: int +) -> Union[int, Tuple[int, int, Union[int, float]]]: + + if high >= low: + + mid = (low + high) // 2 + (id, idValue) = safe_unpack(format, data, 0, mid) + + if id == target: + return (mid, id, idValue) + + elif id > target: + return binary_search(format, data, low, mid - 1, target) + + else: + return binary_search(format, data, mid + 1, high, target) + + else: + return -1 + + def safe_unpack( format: str, data: bytes, offset: int, index: int = None, index_length: int = None ) -> tuple: @@ -36,6 +59,8 @@ def read_string(data: bytes, offset: int) -> str: offset += 1 +NODE_TYPE_MAPPING = {0: "function", 1: "loop", 2: "line", 3: "instruction"} + METRIC_SCOPE_MAPPING = { "execution": "i", "function": "e", @@ -43,7 +68,11 @@ def read_string(data: bytes, offset: int) -> str: "lex_aware": "c", } -NODE_TYPE_MAPPING = {0: "function", 1: "loop", 2: "line", 3: "instruction"} +SUMMARY_METRIC_MAPPING = { + 0: "sum", + 1: "min", + 2: "max", +} FILE_HEADER_OFFSET = 16 @@ -53,30 +82,64 @@ class HPCToolkitReaderLatest: def __init__( self, dir_path: str, + directory_mapping: Dict[str, str] = None, + parallel_profiles_mode: bool = False, max_depth: int = None, min_application_percentage_time: int = None, - min_parent_percentage_time: int = None, + exclude_mpi_function_details: bool = False, + exclude_openmp_function_details: bool = False, + exclude_cuda_function_details: bool = False, + exclude_system_libraries_source_code: bool = False, + exclude_function_call_lines: bool = False, + exclude_no_source_code_instructions: bool = False, + exclude_instructions: bool = False, + exclude_non_function_nodes: bool = False, + label_function_nodes: bool = True, + metric_names: List[str] = ["time"], + metric_scopes: List[str] = ["i", "e"], + summary_metrics: List[str] = ["sum", "min", "max"], + profile_ranks: List[int] = None, ) -> None: self._dir_path = dir_path + self._directory_mapping = directory_mapping or {} + self._parallel_profiles_mode = parallel_profiles_mode self._max_depth = max_depth - self._application_percentage = min_application_percentage_time - self._parent_percentage = min_parent_percentage_time + self._min_application_percentage_time = min_application_percentage_time + + self._exclude_mpi_function_details = exclude_mpi_function_details + self._exclude_openmp_function_details = exclude_openmp_function_details + self._exclude_cuda_function_details = exclude_cuda_function_details + self._exclude_system_libraries_source_code = ( + exclude_system_libraries_source_code + ) + self._exclude_function_call_lines = exclude_function_call_lines + self._exclude_no_source_code_instructions = exclude_no_source_code_instructions + self._exclude_instructions = exclude_instructions + self._exclude_non_function_nodes = exclude_non_function_nodes + + self._metric_names = metric_names or ["time"] + self._metric_scopes = metric_scopes or ["i", "e"] + self._summary_metrics = summary_metrics or ["sum", "min", "max"] + self._profile_ranks = profile_ranks or [] self._meta_file = None self._profile_file = None + self._cct_file = None self._functions = {} self._source_files = {} self._load_modules = {} - self._metric_descriptions = {} + self._graph_roots = [] + self._profiles_data = [] self._summary_profile = {} + self._label_function_nodes = label_function_nodes + self._profile_ids = [] + self._metric_ids = [] + self._metric_descriptions = {} self._time_metric = None - self._inclusive_metrics = {} - self._exclusive_metrics = {} - - self._cct_roots = [] - self._metrics_table = [] + self._total_execution_time = 0 + self._profiles_metadata: pd.DataFrame = None for file_path in os.listdir(self._dir_path): if file_path.split(".")[-1] == "db": @@ -90,6 +153,9 @@ def __init__( self._meta_file = file_path elif format == "prof": self._profile_file = file_path + elif format == "ctxt": + self._cct_file = file_path + except Exception: pass @@ -99,52 +165,8 @@ def __init__( if self._profile_file is None: raise ValueError("ERROR: profile.db not found.") - def _read_metric_descriptions(self) -> None: - with open(self._meta_file, "rb") as file: - file.seek(FILE_HEADER_OFFSET + 4 * 8) - formatMetrics = " Dict[str, str]: if pFile not in self._source_files: @@ -153,10 +175,25 @@ def _parse_source_file(self, meta_db: bytes, pFile: int) -> Dict[str, str]: meta_db, pFile + struct.calcsize(" Node: - node = Node(Frame(frame), parent=parent, hnid=ctxId, depth=depth) - if parent is None: - self._cct_roots.append(node) - else: - parent.add_child(node) - node_value = { - "node": node, - "name": ( - # f"{frame['type']}: {frame['name']}" - frame["name"] - if frame["name"] != 1 - else "entry" - ), - } - - if ctxId in self._summary_profile: - node_value.update(self._summary_profile[ctxId]) - - self._metrics_table.append(node_value) - - return node - def _parse_context( self, current_offset: int, total_size: int, parent: Node, meta_db: bytes, - parent_time: int, ) -> None: - final_offset = current_offset + total_size while current_offset < final_offset: (szChildren, pChildren, ctxId, _, lexicalType, nFlexWords) = safe_unpack( " {frame['name']}" + + if self._exclude_cuda_function_details: + for item in [ + "libcuda.so", + "libcudart.so", + "libcusparse.so", + "libcublas.so", + "libcurand.so", + "libcusolver.so", + "libcufft.so", + "/cuda/", + ]: + if item in file_data["file_path"]: + include_subtree = False + + if self._label_function_nodes: + frame["name"] = f"function: {frame['name']}" + + elif node_type == "instruction": (pModule, offset) = safe_unpack(" 0 + ): + include_node = False + + if not include_node: + node = parent + + else: + node = self._store_cct_node( + ctxId, + frame, + parent, + ) - if self._max_depth is None or node._depth < self._max_depth: + if include_subtree and ( + self._max_depth is None or node._depth < self._max_depth + ): self._parse_context( pChildren, szChildren, node, meta_db, - my_time, ) - def _read_summary_profile( + def _store_cct_node( self, - ) -> None: + ctxId: int, + frame: dict, + parent: Node = None, + ) -> Node: + node = Node( + Frame(frame), + parent=parent, + hnid=ctxId, + depth=0 if parent is None else parent._depth + 1, + ) - with open(self._profile_file, "rb") as file: - file.seek(FILE_HEADER_OFFSET) - formatProfileInfos = " None: + with open(self._meta_file, "rb") as file: + file.seek(FILE_HEADER_OFFSET + 4 * 8) + formatMetrics = " None: + (pScopeName,) = safe_unpack(" None: + identifiers = {} + + with open(self._meta_file, "rb") as file: + file.seek(FILE_HEADER_OFFSET + 2 * 8) + formatIdNames = " None: - for em in (list(self._exclusive_metrics.values()),): - if em in table.columns.tolist(): - exclusive_metrics.append(em) + with open(self._profile_file, "rb") as file: + file.seek(FILE_HEADER_OFFSET) + formatProfileInfos = " GraphFrame: self._read_metric_descriptions() - self._read_summary_profile() + if ( + not self._parallel_profiles_mode + or self._min_application_percentage_time is not None + ): + self._read_summary_profile() + + if self._parallel_profiles_mode: + self._read_profiles_metadata() + return self._read_cct() diff --git a/hatchet/tests/hpctoolkit_latest.py b/hatchet/tests/hpctoolkit_latest.py index 75f74eb7..3eaaa28d 100644 --- a/hatchet/tests/hpctoolkit_latest.py +++ b/hatchet/tests/hpctoolkit_latest.py @@ -8,7 +8,11 @@ def test_import_entire_db(data_dir: str) -> None: - graphframe = GraphFrame.from_hpctoolkit_latest(f"{data_dir}/hpctoolkit-gamess") + graphframe = GraphFrame.from_hpctoolkit_latest( + f"{data_dir}/hpctoolkit-gamess", + metric_names=["time", "gpuop", "gker", "gxcopy", "gxcopy:count"], + label_function_nodes=False, + ) assert len(graphframe.graph.roots) == 1 assert graphframe.graph.roots[0]._hatchet_nid == 1195 @@ -16,7 +20,7 @@ def test_import_entire_db(data_dir: str) -> None: assert graphframe.graph.roots[0].frame["name"] == "entry" assert graphframe.graph.roots[0].frame["type"] == "entry" - assert len(graphframe.dataframe) == 10824 + assert len(graphframe.dataframe) == 11292 assert "name" in graphframe.dataframe.columns assert "time (inc)" in graphframe.dataframe.columns assert "time" in graphframe.dataframe.columns @@ -42,19 +46,22 @@ def test_import_entire_db(data_dir: str) -> None: assert measurements["gxcopy:count (inc)"] == 9688 measurements = graphframe.dataframe.loc[Node(None, hnid=1004)] - assert measurements["name"] == "[libsci_cray.so.5.0]:0" + assert measurements["name"] == "loop [libsci_cray.so.5.0]:0" assert round(measurements["time (inc)"], 2) == 0.08 assert round(measurements["time"], 2) == 0.08 measurements = graphframe.dataframe.loc[Node(None, hnid=1003)] - assert measurements["name"] == "[libsci_cray.so.5.0]:0" + assert measurements["name"] == "line [libsci_cray.so.5.0]:0" assert round(measurements["time (inc)"], 2) == 0.08 assert round(measurements["time"], 2) == 0.08 def test_filter_by_max_depth(data_dir: str) -> None: graphframe = GraphFrame.from_hpctoolkit_latest( - f"{data_dir}/hpctoolkit-gamess", max_depth=10 + f"{data_dir}/hpctoolkit-gamess", + max_depth=10, + metric_names=["time", "gpuop", "gker", "gxcopy", "gxcopy:count"], + label_function_nodes=False, ) assert len(graphframe.graph.roots) == 1 @@ -97,7 +104,7 @@ def test_filter_by_max_depth(data_dir: str) -> None: assert measurements["gxcopy:count (inc)"] == 9688 measurements = graphframe.dataframe.loc[Node(None, hnid=9845)] - assert measurements["name"] == "[gamess.00.x]:0" + assert measurements["name"] == "loop [gamess.00.x]:0" assert round(measurements["time (inc)"], 2) == 786.09 assert round(measurements["gpuop (inc)"], 2) == 608.09 assert round(measurements["gker (inc)"], 2) == 608.00 @@ -110,7 +117,10 @@ def test_filter_by_max_depth(data_dir: str) -> None: def test_filter_by_min_percentage_of_application_time(data_dir: str) -> None: graphframe = GraphFrame.from_hpctoolkit_latest( - f"{data_dir}/hpctoolkit-gamess", min_percentage_of_application_time=1 + f"{data_dir}/hpctoolkit-gamess", + min_percentage_of_application_time=1, + metric_names=["time", "gpuop", "gker", "gxcopy", "gxcopy:count"], + label_function_nodes=False, ) assert len(graphframe.graph.roots) == 1 @@ -151,63 +161,10 @@ def test_filter_by_min_percentage_of_application_time(data_dir: str) -> None: assert round(measurements["time"], 3) == 159.238 measurements = graphframe.dataframe.loc[Node(None, hnid=251)] - assert measurements["name"] == "[libc-2.31.so]:0" + assert measurements["name"] == "line [libc-2.31.so]:0" assert round(measurements["time (inc)"], 3) == 159.238 assert round(measurements["time"], 3) == 159.238 for node in graphframe.graph.traverse(): node_time = graphframe.dataframe.loc[node]["time (inc)"] assert node_time / application_time >= 0.01 - - -def test_filter_by_min_percentage_of_parent_time(data_dir: str) -> None: - graphframe = GraphFrame.from_hpctoolkit_latest( - f"{data_dir}/hpctoolkit-gamess", min_percentage_of_parent_time=1 - ) - - assert len(graphframe.graph.roots) == 1 - assert graphframe.graph.roots[0]._hatchet_nid == 1195 - assert graphframe.graph.roots[0]._depth == 0 - assert graphframe.graph.roots[0].frame["name"] == "entry" - assert graphframe.graph.roots[0].frame["type"] == "entry" - - assert len(graphframe.dataframe) == 4576 - assert "name" in graphframe.dataframe.columns - assert "time (inc)" in graphframe.dataframe.columns - assert "time" in graphframe.dataframe.columns - assert "gpuop (inc)" in graphframe.dataframe.columns - assert "gker (inc)" in graphframe.dataframe.columns - assert "gxcopy (inc)" in graphframe.dataframe.columns - assert "gxcopy:count (inc)" in graphframe.dataframe.columns - - measurements = graphframe.dataframe.loc[Node(None, hnid=1195)] - assert measurements["name"] == "entry" - assert round(measurements["time (inc)"], 2) == 1608.49 - assert round(measurements["gpuop (inc)"], 2) == 608.09 - assert round(measurements["gker (inc)"], 2) == 608.00 - assert round(measurements["gxcopy (inc)"], 2) == 0.09 - assert measurements["gxcopy:count (inc)"] == 9688 - - measurements = graphframe.dataframe.loc[Node(None, hnid=1197)] - assert measurements["name"] == "gamess_" - assert round(measurements["time (inc)"], 2) == 1608.49 - assert round(measurements["gpuop (inc)"], 2) == 608.09 - assert round(measurements["gker (inc)"], 2) == 608.00 - assert round(measurements["gxcopy (inc)"], 2) == 0.09 - assert measurements["gxcopy:count (inc)"] == 9688 - - measurements = graphframe.dataframe.loc[Node(None, hnid=2856)] - assert measurements["name"] == "__GI___sched_yield" - assert round(measurements["time (inc)"], 3) == 159.238 - assert round(measurements["time"], 3) == 159.238 - - measurements = graphframe.dataframe.loc[Node(None, hnid=251)] - assert measurements["name"] == "[libc-2.31.so]:0" - assert round(measurements["time (inc)"], 3) == 159.238 - assert round(measurements["time"], 3) == 159.238 - - for node in graphframe.graph.traverse(): - node_time = graphframe.dataframe.loc[node]["time (inc)"] - if node.frame["type"] != "entry": - parent_time = graphframe.dataframe.loc[node.parents[0]]["time (inc)"] - assert node_time / parent_time >= 0.01