Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
50afc48
perf_reader initial commit
Jul 17, 2023
3f71b2e
graph updated
Jul 24, 2023
4f5223e
add front-level reader API, add PFA reader example
slabasan Jul 19, 2023
7a6dcc2
graph updated
r-yin Jul 24, 2023
0d306b5
added pfw testfile with one iteration of laghos output
r-yin Jul 25, 2023
5e93a00
pfw file processor added
r-yin Aug 8, 2023
bb9c9c9
pfw to literal converter
r-yin Aug 22, 2023
587a17a
sample json
r-yin Aug 22, 2023
105c4c9
pfw folder processor
r-yin Aug 29, 2023
99cf379
finalized pfw to caliper reader
r-yin Sep 9, 2023
37d1b05
cleaning up code
slabasan Apr 11, 2024
65f6aa3
update example for pfa reader
slabasan Apr 11, 2024
d0cd0dc
add perfflowaspect unit test
slabasan Apr 11, 2024
078d83d
add AMS MPI pfa unit test (concat pfw files per rank), flake, black
slabasan Apr 15, 2024
dea17c0
restore kwargs in from_hdf
slabasan Apr 15, 2024
1b13b2e
update comment
slabasan Apr 15, 2024
8f73af7
Add perfflowaspect object reader and update related tests and scripts
Jul 9, 2024
6c94f8f
updated the test file. add extra fields for the time unit and matedat…
Jul 9, 2024
73414e2
perf_reader initial commit
Jul 17, 2023
41a1281
graph updated
Jul 24, 2023
4659c1f
add front-level reader API, add PFA reader example
slabasan Jul 19, 2023
d8d8faf
graph updated
r-yin Jul 24, 2023
ff65cd0
added pfw testfile with one iteration of laghos output
r-yin Jul 25, 2023
48cf77b
pfw file processor added
r-yin Aug 8, 2023
700eba0
pfw to literal converter
r-yin Aug 22, 2023
57a471c
sample json
r-yin Aug 22, 2023
cf6fdb8
pfw folder processor
r-yin Aug 29, 2023
9e67d6f
finalized pfw to caliper reader
r-yin Sep 9, 2023
68065f6
cleaning up code
slabasan Apr 11, 2024
4ed1e7e
update example for pfa reader
slabasan Apr 11, 2024
40ae99f
add perfflowaspect unit test
slabasan Apr 11, 2024
0e2449c
add AMS MPI pfa unit test (concat pfw files per rank), flake, black
slabasan Apr 15, 2024
b521c0a
restore kwargs in from_hdf
slabasan Apr 15, 2024
04a387e
update comment
slabasan Apr 15, 2024
cf54080
read cpu/mem usage
spencer-gre Jul 11, 2024
c8c4aa2
updates to pfa reader, prepare tests
spencer-gre Jul 16, 2024
449b620
forgot this test file
spencer-gre Jul 16, 2024
8b5cf61
tests for cpu-mem
spencer-gre Jul 17, 2024
6afb989
fix tests
spencer-gre Jul 17, 2024
b07fbf3
Read All PFW Files
spencer-gre Jul 17, 2024
b1461b5
Cleanup/Prep for Object Reader
spencer-gre Jul 25, 2024
faf48b2
Removing PFW JSON Files
spencer-gre Jul 25, 2024
6a90b58
added us to s converter. fixed object reader tests
loudsun1997 Jul 29, 2024
84cf2f7
Merge branch 'pfa-reader-pr' into pfa-hatchet-pr-conflict-resolved
loudsun1997 Jul 29, 2024
f5723ba
Cleanup after merge
spencer-gre Jul 29, 2024
636086b
added tests. validated things I wroked on are properly merged and wor…
loudsun1997 Jul 30, 2024
b11f3dd
Merge branch 'pfa-hatchet-pr-conflict-resolved' of https://github.com…
loudsun1997 Jul 30, 2024
8f09fd0
Merge pull request #1 from spencer-gre/pfa-hatchet-pr-conflict-resolved
loudsun1997 Jul 30, 2024
a1e5771
flake8
spencer-gre Jul 30, 2024
46f535d
Merge remote-tracking branch 'upstream/develop' into pfa-hatchet-pr
spencer-gre Nov 16, 2025
164a215
code format
spencer-gre Nov 16, 2025
27fe279
pushed
spencer-gre Nov 24, 2025
1e55e77
nearly working single pfa reader
spencer-gre Dec 4, 2025
e99b000
wrapping up pfa reader
spencer-gre Dec 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions docs/examples/read/perfflowaspect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python
#
# Copyright 2017-2023 Lawrence Livermore National Security, LLC and other
# Hatchet Project Developers. See the top-level LICENSE file for details.
#
# SPDX-License-Identifier: MIT

import hatchet as ht


if __name__ == "__main__":
# pfa_file = "../../../hatchet/tests/data/perfflowaspect-smoketests/array_compact.pfw"
# pfa_file = "../../../hatchet/tests/data/perfflowaspect-smoketests/array_verbose.pfw"
# pfa_file = "../../../hatchet/tests/data/perfflowaspect-smoketests/object_compact_adiak.pfw"
pfa_file = "../../../hatchet/tests/data/perfflowaspect-smoketests/object_verbose_adiak.pfw"

gf = ht.GraphFrame.from_perfflowaspect(pfa_file)

# Printout the DataFrame component of the GraphFrame.
print(gf.dataframe)
print(gf.metadata)

print(len(gf.graph.roots))

for i, node in enumerate(gf.graph.traverse()):
print(node._hatchet_nid, node, list(node.parents), list(node.children))

# Printout the graph component of the GraphFrame.
# Use "ts" as the metric column to be displayed
print(gf.tree(metric_column=["dur"]))
# print(gf.tree(metric_column="ts"))
6 changes: 6 additions & 0 deletions hatchet/graphframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,12 @@ def from_hdf(filename, **kwargs):
)
raise ve

@staticmethod
def from_perfflowaspect(filename, scan_memory=False, scan_cpu=False):
from .readers.perfflowaspect_reader import PerfFlowAspectReader

return PerfFlowAspectReader(filename, scan_memory=False, scan_cpu=False).read()

@deprecated(
"Reading from/writing to HDF5 is deprecated and will be removed in a later version."
)
Expand Down
25 changes: 12 additions & 13 deletions hatchet/readers/hpctoolkit_reader_latest.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def read_string(data: bytes, offset: int) -> str:


class HPCToolkitReaderLatest:

def __init__(
self,
dir_path: str,
Expand Down Expand Up @@ -287,15 +286,15 @@ def _parse_context(

elif lexicalType == 3:
(pModule, offset) = safe_unpack("<QQ", meta_db, flex_offset)
frame["name"] = (
f"{self._parse_load_module(meta_db, pModule)['module_path']}:{offset}"
)
frame[
"name"
] = f"{self._parse_load_module(meta_db, pModule)['module_path']}:{offset}"

else:
(pFile, line) = safe_unpack("<QL", meta_db, flex_offset)
frame["name"] = (
f"{self._parse_source_file(meta_db, pFile)['file_path']}:{line}"
)
frame[
"name"
] = f"{self._parse_source_file(meta_db, pFile)['file_path']}:{line}"

node = self._store_cct_node(ctxId, frame, parent, parent._depth + 1)

Expand Down Expand Up @@ -356,13 +355,13 @@ def _read_summary_profile(
] = value

if self._metric_descriptions[metricId].endswith("(i)"):
self._inclusive_metrics[metricId] = (
self._metric_descriptions[metricId]
)
self._inclusive_metrics[
metricId
] = self._metric_descriptions[metricId]
else:
self._exclusive_metrics[metricId] = (
self._metric_descriptions[metricId]
)
self._exclusive_metrics[
metricId
] = self._metric_descriptions[metricId]

def _read_cct(
self,
Expand Down
191 changes: 191 additions & 0 deletions hatchet/readers/perfflowaspect_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
import json
import pandas as pd

import hatchet.graphframe
from hatchet.node import Node
from hatchet.graph import Graph
from hatchet.frame import Frame


class PerfFlowAspectReader:
"""Create a GraphFrame from PerfFlowAspect trace files.

Return:
(GraphFrame): graphframe containing data from dictionaries
"""

def __init__(self, filename, scan_memory=False, scan_cpu=False):
"""
filename (str): A path to a PerfFlowAspect trace file.
scan_memory (bool): Whether or not to include memory usage statistics
scan_cpu (bool): Whether or not to include CPU usage statistics
"""
self.scan_memory = scan_memory
self.scan_cpu = scan_cpu
with open(filename, "r+") as file:
raw = file.read()

try:
data = json.loads(raw)
except json.JSONDecodeError:
fixed = self._repair_array_json(raw)
try:
data = json.loads(fixed)
except json.JSONDecodeError as e:
raise ValueError(f"Trace file could not be parsed or repaired: {e}")

if isinstance(data, dict) and "traceEvents" in data and isinstance(data["traceEvents"], list):
obj = data
self.displayTimeUnit = obj.get("displayTimeUnit")
self.metadata = obj.get("otherData", {})
self.spec_dict = obj["traceEvents"]
elif isinstance(data, list):
self.displayTimeUnit = None
self.metadata = {}
self.spec_dict = data
else:
raise ValueError("Trace must be either object or array format")

# Change verbose output to compact output
if self.spec_dict and self.spec_dict[0].get("ph") == "B":
stack = []
final = []
for event in self.spec_dict:
ph = event.get("ph")

if ph == "B":
stack.append(event.copy())
elif ph == "E":
if not stack:
continue
start = stack.pop()
merged = start
merged["dur"] = event["ts"] - start["ts"]
merged["ph"] = "X"
final.append(merged)
self.spec_dict = final

def _repair_array_json(self, text):
text = text.rstrip()
text = text.replace(",\n]", "\n]")
if not text.endswith("]"):
text += "]"
if not text.lstrip().startswith("["):
text = "[" + text
return text

def sort(self):
# Sort the spec_dict based on the end time (ts + dur) of each function
self.spec_dict = sorted(
self.spec_dict, key=lambda item: item["ts"] + item["dur"]
)

def read(self):
roots = []
node_mapping = {} # Dictionary to keep track of the nodes
node_dicts = []
usage_pairings = {} # usage_pairings[ts] = (memory, cpu)

# Error if attempt is made to retrieve statistics,
# but no statistics exist.
if all("C" not in item["ph"] for item in self.spec_dict) and (
self.scan_cpu or self.scan_memory
):
raise ValueError("No statistics in the provided file!")

for item in self.spec_dict:
# the following values always appear in a PerfFlowAspect log
name = item["name"]
ts = item["ts"] * 1e-6 # convert to seconds
ph = item["ph"]

# these items may or may not appear.
dur = None
memory = 0
cpu = 0

# If statistic event, get the statistics and match with
# the timestamp.
if ph == "C":
valid_statistic = False
if self.scan_memory:
if item["args"]["memory_usage"] != 0:
memory = item["args"]["memory_usage"]
valid_statistic = True
if self.scan_cpu:
if item["args"]["cpu_usage"] != 0.0:
cpu = item["args"]["cpu_usage"]
valid_statistic = True
if valid_statistic:
usage_pairings[ts] = (memory, cpu)
continue

dur = item["dur"] * 1e-6

# A Frame always consists of these values
frame_values = {"name": name, "type": "function", "ts": ts, "dur": dur}

# Optionally, if logging statistics, insert memory and cpu usage
# into the Frame
if self.scan_memory:
memory = usage_pairings[ts][0]
frame_values["usage_memory"] = memory
if self.scan_cpu:
cpu = usage_pairings[ts][1]
frame_values["usage_cpu"] = cpu

# Create a Frame and Node for the function
# Frame stores information about the node
# Node represents a node in the hierarchical graph structure
frame = Frame(frame_values)
node = Node(frame, parent=None, hnid=-1)

# check the relationships between node and roots
for root in reversed(roots):
# if node is a parent of root node
if (ts < root.frame["ts"]) and (
ts + dur > root.frame["ts"] + root.frame["dur"]
):
node.add_child(root)
root.add_parent(node)
roots.pop()
roots.append(node)

node_dict_vals = {
"node": node,
"name": name,
"ts": ts,
"dur": dur,
"pid": item["pid"],
"tid": item["tid"],
"ph": item["ph"],
}
if self.scan_memory:
node_dict_vals["usage_memory"] = memory
if self.scan_cpu:
node_dict_vals["usage_cpu"] = cpu

node_dict = dict(node_dict_vals)
node_dicts.append(node_dict)

# Store the Node object with its name for future reference
print("Add", name, "to node map")
node_mapping[name] = node

# Create the Graph object from the root nodes
graph = Graph(roots)
graph.enumerate_traverse()

dataframe = pd.DataFrame(data=node_dicts)
dataframe.set_index(["node"], inplace=True)
dataframe.sort_index(inplace=True)

exc_metrics = []
inc_metrics = []
for col in dataframe.columns:
if "(inc)" in col:
inc_metrics.append(col)
else:
exc_metrics.append(col)

return hatchet.graphframe.GraphFrame(graph, dataframe, metadata=self.metadata)
6 changes: 3 additions & 3 deletions hatchet/readers/timemory_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,9 @@ def match_labels_and_values(_metric_stats, _metric_label, _metric_type):
# match with metric labels if _metric_stat item is a list.
elif isinstance(_item, list):
for i in range(len(_item)):
_ret["{}.{}{}".format(_key, _metric_label[i], _metric_type)] = (
_item[i]
)
_ret[
"{}.{}{}".format(_key, _metric_label[i], _metric_type)
] = _item[i]
# check if _metric_stat item is not a dict or list
else:
_ret["{}.{}{}".format(_key, _metric_label, _metric_type)] = _item
Expand Down
Loading