diff --git a/hatchet/graphframe.py b/hatchet/graphframe.py index 70727d6c..7c9c136b 100644 --- a/hatchet/graphframe.py +++ b/hatchet/graphframe.py @@ -182,7 +182,7 @@ def from_hpctoolkit_latest( ).read() @staticmethod - def from_caliper(filename_or_stream, query=None): + def from_caliper(filename_or_stream, query=None, node_ordering=True): """Read in a Caliper .cali or .json file. Args: @@ -190,15 +190,19 @@ def from_caliper(filename_or_stream, query=None): file in `.cali` or JSON-split format, or an open file object to read one query (str): cali-query in CalQL format + node_ordering (bool): use node ordering (default to true) """ # import this lazily to avoid circular dependencies from .readers.caliper_reader import CaliperReader - return CaliperReader(filename_or_stream, query).read() + return CaliperReader(filename_or_stream, query, node_ordering).read() @staticmethod def from_caliperreader( - filename_or_caliperreader, native=False, string_attributes=[] + filename_or_caliperreader, + native=False, + string_attributes=[], + node_ordering=True, ): """Read in a native Caliper `cali` file using Caliper's python reader. @@ -208,12 +212,13 @@ def from_caliperreader( native (bool): use native or user-readable metric names (default) string_attributes (str or list, optional): Adds existing string attributes from within the caliper file to the dataframe + node_ordering (bool): use node ordering, defaults to true """ # import this lazily to avoid circular dependencies from .readers.caliper_native_reader import CaliperNativeReader return CaliperNativeReader( - filename_or_caliperreader, native, string_attributes + filename_or_caliperreader, native, string_attributes, node_ordering ).read() @staticmethod @@ -222,6 +227,7 @@ def from_timeseries( level="loop.start_iteration", native=False, string_attributes=[], + node_ordering=True, ): """Read in a native Caliper timeseries `cali` file using Caliper's python reader. @@ -236,7 +242,10 @@ def from_timeseries( from .readers.caliper_native_reader import CaliperNativeReader return CaliperNativeReader( - filename_or_caliperreader, native, string_attributes + filename_or_caliperreader, + native, + string_attributes, + node_ordering, ).read_timeseries(level=level) @staticmethod diff --git a/hatchet/readers/caliper_native_reader.py b/hatchet/readers/caliper_native_reader.py index 00ebe63f..e3e98214 100644 --- a/hatchet/readers/caliper_native_reader.py +++ b/hatchet/readers/caliper_native_reader.py @@ -45,7 +45,9 @@ class CaliperNativeReader: ), } - def __init__(self, filename_or_caliperreader, native, string_attributes): + def __init__( + self, filename_or_caliperreader, native, string_attributes, node_ordering + ): """Read in a native cali using Caliper's python reader. Args: @@ -53,6 +55,7 @@ def __init__(self, filename_or_caliperreader, native, string_attributes): a CaliperReader object native (bool): use native metric names or user-readable metric names string_attributes (str or list): Adds existing string attributes from within the caliper file to the dataframe + node_ordering (bool): if true, use node ordering """ self.filename_or_caliperreader = filename_or_caliperreader self.filename_ext = "" @@ -67,7 +70,7 @@ def __init__(self, filename_or_caliperreader, native, string_attributes): self.idx_to_node = {} self.callpath_to_idx = {} self.global_nid = 0 - self.node_ordering = False + self.node_ordering = node_ordering self.gf_list = [] self.timeseries_level = None @@ -322,8 +325,17 @@ def _create_parent(child_node, parent_callpath): if not hnode: # set the _hatchet_nid by the node order column if it exists, else -1 - if "min#min#aggregate.slot" in record: - self.node_ordering = True + if ( + self.node_ordering + and "min#min#aggregate.slot" in record + ): + Exception( + "node ordering cannot be true if min#min#aggregate.slot is not in the record" + ) + elif ( + self.node_ordering + and "min#min#aggregate.slot" in record + ): order = record["min#min#aggregate.slot"] else: order = self.global_nid diff --git a/hatchet/readers/caliper_reader.py b/hatchet/readers/caliper_reader.py index e4a86f93..cfcfbee5 100644 --- a/hatchet/readers/caliper_reader.py +++ b/hatchet/readers/caliper_reader.py @@ -26,18 +26,19 @@ class CaliperReader: """Read in a Caliper file (`cali` or split JSON) or file-like object.""" - def __init__(self, filename_or_stream, query=""): + def __init__(self, filename_or_stream, query="", node_ordering=True): """Read from Caliper files (`cali` or split JSON). Args: filename_or_stream (str or file-like): name of a `cali` or `cali-query` split JSON file, OR an open file object query (str): cali-query arguments (for cali file) + node_ordering (bool): use node ordering (default: true) """ self.filename_or_stream = filename_or_stream self.filename_ext = "" self.query = query - self.node_ordering = False + self.node_ordering = node_ordering self.json_data = {} self.json_cols = {} @@ -155,10 +156,14 @@ def create_graph(self): self.idx_to_label[idx] = node_label if node["column"] == self.path_col_name: - # If there is a node orderering, assign to the _hatchet_nid - if "Node order" in self.json_cols: - self.node_ordering = True + # If there is a node ordering, assign to the _hatchet_nid + if self.node_ordering and "Node order" not in self.json_cols: + Exception( + "node ordering cannot be true if min#min#aggregate.slot is not in the record" + ) + elif self.node_ordering and "Node order" in self.json_cols: order = self.json_data[idx][0] + if "parent" not in node: # since this node does not have a parent, this is a root graph_root = Node( diff --git a/hatchet/tests/caliper.py b/hatchet/tests/caliper.py index 58fe7048..554fb465 100644 --- a/hatchet/tests/caliper.py +++ b/hatchet/tests/caliper.py @@ -49,7 +49,7 @@ def test_graphframe(lulesh_caliper_json): """Sanity test a GraphFrame object with known data.""" - gf = GraphFrame.from_caliper(str(lulesh_caliper_json)) + gf = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False) assert len(gf.dataframe.groupby("name")) == 24 @@ -80,7 +80,7 @@ def test_read_lulesh_json(lulesh_caliper_json): def test_calc_pi_json(calc_pi_caliper_json): """Sanity test a GraphFrame object with known data.""" - gf = GraphFrame.from_caliper(str(calc_pi_caliper_json)) + gf = GraphFrame.from_caliper(str(calc_pi_caliper_json), node_ordering=False) assert len(gf.dataframe.groupby("name")) == 100 @@ -123,8 +123,8 @@ def test_lulesh_json_stream(lulesh_caliper_cali): @pytest.mark.skipif(sys.version_info > (3, 8), reason="Temporarily allow this to fail.") def test_filter_squash_unify_caliper_data(lulesh_caliper_json): """Sanity test a GraphFrame object with known data.""" - gf1 = GraphFrame.from_caliper(str(lulesh_caliper_json)) - gf2 = GraphFrame.from_caliper(str(lulesh_caliper_json)) + gf1 = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False) + gf2 = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False) assert gf1.graph is not gf2.graph @@ -160,7 +160,7 @@ def test_filter_squash_unify_caliper_data(lulesh_caliper_json): def test_tree(monkeypatch, lulesh_caliper_json): """Sanity test a GraphFrame object with known data.""" monkeypatch.setattr("sys.stdout.isatty", (lambda: False)) - gf = GraphFrame.from_caliper(str(lulesh_caliper_json)) + gf = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False) output = gf.tree(metric_column="time") assert "121489.000 main" in output @@ -175,7 +175,7 @@ def test_tree(monkeypatch, lulesh_caliper_json): def test_graphframe_to_literal(lulesh_caliper_json): """Sanity test a GraphFrame object with known data.""" - gf = GraphFrame.from_caliper(str(lulesh_caliper_json)) + gf = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False) graph_literal = gf.to_literal() gf2 = GraphFrame.from_literal(graph_literal) @@ -830,7 +830,7 @@ def test_graphframe_squash_file_node_order(caliper_ordered_cali): def test_inclusive_time_calculation(lulesh_caliper_json): """Validate update_inclusive_columns() on known dataset containing per-rank data.""" - gf = GraphFrame.from_caliper(str(lulesh_caliper_json)) + gf = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False) # save original time (inc) column for correctness check gf.dataframe["orig_inc_time"] = gf.dataframe["time (inc)"]