Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions hatchet/graphframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,23 +182,27 @@ def from_hpctoolkit_latest(
).read()

@staticmethod
def from_caliper(filename_or_stream, query=None):
def from_caliper(filename_or_stream, query=None, node_ordering=True):
"""Read in a Caliper .cali or .json file.

Args:
filename_or_stream (str or file-like): name of a Caliper output
file in `.cali` or JSON-split format, or an open file object
to read one
query (str): cali-query in CalQL format
node_ordering (bool): use node ordering (default to true)
"""
# import this lazily to avoid circular dependencies
from .readers.caliper_reader import CaliperReader

return CaliperReader(filename_or_stream, query).read()
return CaliperReader(filename_or_stream, query, node_ordering).read()

@staticmethod
def from_caliperreader(
filename_or_caliperreader, native=False, string_attributes=[]
filename_or_caliperreader,
native=False,
string_attributes=[],
node_ordering=True,
):
"""Read in a native Caliper `cali` file using Caliper's python reader.

Expand All @@ -208,12 +212,13 @@ def from_caliperreader(
native (bool): use native or user-readable metric names (default)
string_attributes (str or list, optional): Adds existing string
attributes from within the caliper file to the dataframe
node_ordering (bool): use node ordering, defaults to true
"""
# import this lazily to avoid circular dependencies
from .readers.caliper_native_reader import CaliperNativeReader

return CaliperNativeReader(
filename_or_caliperreader, native, string_attributes
filename_or_caliperreader, native, string_attributes, node_ordering
).read()

@staticmethod
Expand All @@ -222,6 +227,7 @@ def from_timeseries(
level="loop.start_iteration",
native=False,
string_attributes=[],
node_ordering=True,
):
"""Read in a native Caliper timeseries `cali` file using Caliper's python reader.

Expand All @@ -236,7 +242,10 @@ def from_timeseries(
from .readers.caliper_native_reader import CaliperNativeReader

return CaliperNativeReader(
filename_or_caliperreader, native, string_attributes
filename_or_caliperreader,
native,
string_attributes,
node_ordering,
).read_timeseries(level=level)

@staticmethod
Expand Down
20 changes: 16 additions & 4 deletions hatchet/readers/caliper_native_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,17 @@ class CaliperNativeReader:
),
}

def __init__(self, filename_or_caliperreader, native, string_attributes):
def __init__(
self, filename_or_caliperreader, native, string_attributes, node_ordering
):
"""Read in a native cali using Caliper's python reader.

Args:
filename_or_caliperreader (str or CaliperReader): name of a `cali` file OR
a CaliperReader object
native (bool): use native metric names or user-readable metric names
string_attributes (str or list): Adds existing string attributes from within the caliper file to the dataframe
node_ordering (bool): if true, use node ordering
"""
self.filename_or_caliperreader = filename_or_caliperreader
self.filename_ext = ""
Expand All @@ -67,7 +70,7 @@ def __init__(self, filename_or_caliperreader, native, string_attributes):
self.idx_to_node = {}
self.callpath_to_idx = {}
self.global_nid = 0
self.node_ordering = False
self.node_ordering = node_ordering
self.gf_list = []
self.timeseries_level = None

Expand Down Expand Up @@ -322,8 +325,17 @@ def _create_parent(child_node, parent_callpath):

if not hnode:
# set the _hatchet_nid by the node order column if it exists, else -1
if "min#min#aggregate.slot" in record:
self.node_ordering = True
if (
self.node_ordering
and "min#min#aggregate.slot" in record
):
Exception(
"node ordering cannot be true if min#min#aggregate.slot is not in the record"
)
elif (
self.node_ordering
and "min#min#aggregate.slot" in record
):
order = record["min#min#aggregate.slot"]
else:
order = self.global_nid
Expand Down
15 changes: 10 additions & 5 deletions hatchet/readers/caliper_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,19 @@
class CaliperReader:
"""Read in a Caliper file (`cali` or split JSON) or file-like object."""

def __init__(self, filename_or_stream, query=""):
def __init__(self, filename_or_stream, query="", node_ordering=True):
"""Read from Caliper files (`cali` or split JSON).

Args:
filename_or_stream (str or file-like): name of a `cali` or
`cali-query` split JSON file, OR an open file object
query (str): cali-query arguments (for cali file)
node_ordering (bool): use node ordering (default: true)
"""
self.filename_or_stream = filename_or_stream
self.filename_ext = ""
self.query = query
self.node_ordering = False
self.node_ordering = node_ordering

self.json_data = {}
self.json_cols = {}
Expand Down Expand Up @@ -155,10 +156,14 @@ def create_graph(self):
self.idx_to_label[idx] = node_label

if node["column"] == self.path_col_name:
# If there is a node orderering, assign to the _hatchet_nid
if "Node order" in self.json_cols:
self.node_ordering = True
# If there is a node ordering, assign to the _hatchet_nid
if self.node_ordering and "Node order" not in self.json_cols:
Exception(
"node ordering cannot be true if min#min#aggregate.slot is not in the record"
)
elif self.node_ordering and "Node order" in self.json_cols:
order = self.json_data[idx][0]

if "parent" not in node:
# since this node does not have a parent, this is a root
graph_root = Node(
Expand Down
14 changes: 7 additions & 7 deletions hatchet/tests/caliper.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@

def test_graphframe(lulesh_caliper_json):
"""Sanity test a GraphFrame object with known data."""
gf = GraphFrame.from_caliper(str(lulesh_caliper_json))
gf = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False)

assert len(gf.dataframe.groupby("name")) == 24

Expand Down Expand Up @@ -80,7 +80,7 @@ def test_read_lulesh_json(lulesh_caliper_json):

def test_calc_pi_json(calc_pi_caliper_json):
"""Sanity test a GraphFrame object with known data."""
gf = GraphFrame.from_caliper(str(calc_pi_caliper_json))
gf = GraphFrame.from_caliper(str(calc_pi_caliper_json), node_ordering=False)

assert len(gf.dataframe.groupby("name")) == 100

Expand Down Expand Up @@ -123,8 +123,8 @@ def test_lulesh_json_stream(lulesh_caliper_cali):
@pytest.mark.skipif(sys.version_info > (3, 8), reason="Temporarily allow this to fail.")
def test_filter_squash_unify_caliper_data(lulesh_caliper_json):
"""Sanity test a GraphFrame object with known data."""
gf1 = GraphFrame.from_caliper(str(lulesh_caliper_json))
gf2 = GraphFrame.from_caliper(str(lulesh_caliper_json))
gf1 = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False)
gf2 = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False)

assert gf1.graph is not gf2.graph

Expand Down Expand Up @@ -160,7 +160,7 @@ def test_filter_squash_unify_caliper_data(lulesh_caliper_json):
def test_tree(monkeypatch, lulesh_caliper_json):
"""Sanity test a GraphFrame object with known data."""
monkeypatch.setattr("sys.stdout.isatty", (lambda: False))
gf = GraphFrame.from_caliper(str(lulesh_caliper_json))
gf = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False)
output = gf.tree(metric_column="time")

assert "121489.000 main" in output
Expand All @@ -175,7 +175,7 @@ def test_tree(monkeypatch, lulesh_caliper_json):

def test_graphframe_to_literal(lulesh_caliper_json):
"""Sanity test a GraphFrame object with known data."""
gf = GraphFrame.from_caliper(str(lulesh_caliper_json))
gf = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False)
graph_literal = gf.to_literal()

gf2 = GraphFrame.from_literal(graph_literal)
Expand Down Expand Up @@ -830,7 +830,7 @@ def test_graphframe_squash_file_node_order(caliper_ordered_cali):

def test_inclusive_time_calculation(lulesh_caliper_json):
"""Validate update_inclusive_columns() on known dataset containing per-rank data."""
gf = GraphFrame.from_caliper(str(lulesh_caliper_json))
gf = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False)

# save original time (inc) column for correctness check
gf.dataframe["orig_inc_time"] = gf.dataframe["time (inc)"]
Expand Down