llnl · pearce8 · Jun 20, 2024 · Jun 10, 2024 · Jun 10, 2024 · Jun 11, 2024
diff --git a/thicket/ensemble.py b/thicket/ensemble.py
@@ -369,12 +369,15 @@ def _handle_statsframe():
         return combined_th
 
     @staticmethod
-    def _index(thickets, from_statsframes=False, disable_tqdm=False):
+    def _index(
+        thickets, from_statsframes=False, fill_perfdata=True, disable_tqdm=False
+    ):
         """Unify a list of thickets into a single thicket
 
         Arguments:
             thickets (list): list of Thicket objects
             from_statsframes (bool): Whether this method was invoked from from_statsframes
+            fill_perfdata (bool): whether to fill missing performance data with NaNs
             disable_tqdm (bool): whether to disable tqdm progress bar
 
         Returns:
@@ -456,7 +459,8 @@ def _fill_perfdata(df, numerical_fill_value=np.nan):
         validate_dataframe(unify_df)
 
         # Insert missing rows in dataframe
-        unify_df = _fill_perfdata(unify_df)
+        if fill_perfdata:
+            unify_df = _fill_perfdata(unify_df)
 
         # Sort PerfData
         unify_df.sort_index(inplace=True)

diff --git a/thicket/thicket.py b/thicket/thicket.py
@@ -203,7 +203,11 @@ def thicketize_graphframe(gf, prf):
 
     @staticmethod
     def from_caliper(
-        filename_or_stream, query=None, intersection=False, disable_tqdm=False
+        filename_or_stream,
+        query=None,
+        intersection=False,
+        fill_perfdata=True,
+        disable_tqdm=False,
     ):
         """Read in a Caliper .cali or .json file.
 
@@ -212,48 +216,62 @@ def from_caliper(
                 `.cali` or JSON-split format, or an open file object to read one
             query (str): cali-query in CalQL format
             intersection (bool): whether to perform intersection or union (default)
+            fill_perfdata (bool): whether to fill missing performance data with NaNs
             disable_tqdm (bool): whether to display tqdm progress bar
         """
         return Thicket.reader_dispatch(
             GraphFrame.from_caliper,
             intersection,
+            fill_perfdata,
             disable_tqdm,
             filename_or_stream,
             query,
         )
 
     @staticmethod
-    def from_hpctoolkit(dirname, intersection=False, disable_tqdm=False):
+    def from_hpctoolkit(
+        dirname, intersection=False, fill_perfdata=True, disable_tqdm=False
+    ):
         """Create a GraphFrame using hatchet's HPCToolkit reader and use its attributes
         to make a new thicket.
 
         Arguments:
             dirname (str): parent directory of an HPCToolkit experiment.xml file
             intersection (bool): whether to perform intersection or union (default)
+            fill_perfdata (bool): whether to fill missing performance data with NaNs
             disable_tqdm (bool): whether to display tqdm progress bar
 
         Returns:
             (thicket): new thicket containing HPCToolkit profile data
         """
         return Thicket.reader_dispatch(
-            GraphFrame.from_hpctoolkit, intersection, disable_tqdm, dirname
+            GraphFrame.from_hpctoolkit,
+            intersection,
+            fill_perfdata,
+            disable_tqdm,
+            dirname,
         )
 
     @staticmethod
     def from_caliperreader(
-        filename_or_caliperreader, intersection=False, disable_tqdm=False
+        filename_or_caliperreader,
+        intersection=False,
+        fill_perfdata=True,
+        disable_tqdm=False,
     ):
         """Helper function to read one caliper file.
 
         Arguments:
             filename_or_caliperreader (str or CaliperReader): name of a Caliper output
                 file in `.cali` format, or a CaliperReader object
             intersection (bool): whether to perform intersection or union (default)
+            fill_perfdata (bool): whether to fill missing performance data with NaNs
             disable_tqdm (bool): whether to display tqdm progress bar
         """
         return Thicket.reader_dispatch(
             GraphFrame.from_caliperreader,
             intersection,
+            fill_perfdata,
             disable_tqdm,
             filename_or_caliperreader,
         )
@@ -295,7 +313,9 @@ def from_literal(graph_dict):
         return tk
 
     @staticmethod
-    def reader_dispatch(func, intersection, disable_tqdm, *args, **kwargs):
+    def reader_dispatch(
+        func, intersection, fill_perfdata, disable_tqdm, *args, **kwargs
+    ):
         """Create a thicket from a list, directory of files, or a single file.
 
         Arguments:
@@ -353,6 +373,7 @@ def reader_dispatch(func, intersection, disable_tqdm, *args, **kwargs):
             thickets=ens_list,
             axis="index",
             calltree=calltree,
+            fill_perfdata=fill_perfdata,
             disable_tqdm=disable_tqdm,
         )
 
@@ -372,6 +393,7 @@ def concat_thickets(
             calltree (str): calltree to use -> "union" or "intersection"
 
         Keyword Arguments:
+            fill_perfdata (bool): (if axis="index") Whether to fill missing performance data with NaNs
             headers (list): (if axis="columns") List of headers to use for the new columnar multi-index
             metadata_key (str): (if axis="columns") Name of the column from the metadata tables to replace the 'profile'
                 index. If no argument is provided, it is assumed that there is no profile-wise
@@ -381,10 +403,16 @@ def concat_thickets(
             (thicket): concatenated thicket
         """
 
-        def _index(thickets, from_statsframes=False, disable_tqdm=disable_tqdm):
+        def _index(
+            thickets,
+            from_statsframes=False,
+            fill_perfdata=True,
+            disable_tqdm=disable_tqdm,
+        ):
             thicket_parts = Ensemble._index(
                 thickets=thickets,
                 from_statsframes=from_statsframes,
+                fill_perfdata=fill_perfdata,
                 disable_tqdm=disable_tqdm,
             )
 
@@ -1112,6 +1140,12 @@ def filter_metadata(self, select_function):
         else:
             raise InvalidFilter("The argument passed to filter must be a callable.")
 
+        # If fill_perfdata is False, may need to squash
+        if len(new_thicket.graph) != len(
+            new_thicket.dataframe.index.get_level_values("node").unique()
+        ):
+            new_thicket = new_thicket.squash()
+
         return new_thicket
 
     def filter(self, filter_func):