diff --git a/thicket/ensemble.py b/thicket/ensemble.py index 3e229357..b0d6d80e 100644 --- a/thicket/ensemble.py +++ b/thicket/ensemble.py @@ -369,12 +369,15 @@ def _handle_statsframe(): return combined_th @staticmethod - def _index(thickets, from_statsframes=False, disable_tqdm=False): + def _index( + thickets, from_statsframes=False, fill_perfdata=True, disable_tqdm=False + ): """Unify a list of thickets into a single thicket Arguments: thickets (list): list of Thicket objects from_statsframes (bool): Whether this method was invoked from from_statsframes + fill_perfdata (bool): whether to fill missing performance data with NaNs disable_tqdm (bool): whether to disable tqdm progress bar Returns: @@ -456,7 +459,8 @@ def _fill_perfdata(df, numerical_fill_value=np.nan): validate_dataframe(unify_df) # Insert missing rows in dataframe - unify_df = _fill_perfdata(unify_df) + if fill_perfdata: + unify_df = _fill_perfdata(unify_df) # Sort PerfData unify_df.sort_index(inplace=True) diff --git a/thicket/thicket.py b/thicket/thicket.py index 439e1324..3ae84645 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -203,7 +203,11 @@ def thicketize_graphframe(gf, prf): @staticmethod def from_caliper( - filename_or_stream, query=None, intersection=False, disable_tqdm=False + filename_or_stream, + query=None, + intersection=False, + fill_perfdata=True, + disable_tqdm=False, ): """Read in a Caliper .cali or .json file. @@ -212,36 +216,48 @@ def from_caliper( `.cali` or JSON-split format, or an open file object to read one query (str): cali-query in CalQL format intersection (bool): whether to perform intersection or union (default) + fill_perfdata (bool): whether to fill missing performance data with NaNs disable_tqdm (bool): whether to display tqdm progress bar """ return Thicket.reader_dispatch( GraphFrame.from_caliper, intersection, + fill_perfdata, disable_tqdm, filename_or_stream, query, ) @staticmethod - def from_hpctoolkit(dirname, intersection=False, disable_tqdm=False): + def from_hpctoolkit( + dirname, intersection=False, fill_perfdata=True, disable_tqdm=False + ): """Create a GraphFrame using hatchet's HPCToolkit reader and use its attributes to make a new thicket. Arguments: dirname (str): parent directory of an HPCToolkit experiment.xml file intersection (bool): whether to perform intersection or union (default) + fill_perfdata (bool): whether to fill missing performance data with NaNs disable_tqdm (bool): whether to display tqdm progress bar Returns: (thicket): new thicket containing HPCToolkit profile data """ return Thicket.reader_dispatch( - GraphFrame.from_hpctoolkit, intersection, disable_tqdm, dirname + GraphFrame.from_hpctoolkit, + intersection, + fill_perfdata, + disable_tqdm, + dirname, ) @staticmethod def from_caliperreader( - filename_or_caliperreader, intersection=False, disable_tqdm=False + filename_or_caliperreader, + intersection=False, + fill_perfdata=True, + disable_tqdm=False, ): """Helper function to read one caliper file. @@ -249,11 +265,13 @@ def from_caliperreader( filename_or_caliperreader (str or CaliperReader): name of a Caliper output file in `.cali` format, or a CaliperReader object intersection (bool): whether to perform intersection or union (default) + fill_perfdata (bool): whether to fill missing performance data with NaNs disable_tqdm (bool): whether to display tqdm progress bar """ return Thicket.reader_dispatch( GraphFrame.from_caliperreader, intersection, + fill_perfdata, disable_tqdm, filename_or_caliperreader, ) @@ -295,7 +313,9 @@ def from_literal(graph_dict): return tk @staticmethod - def reader_dispatch(func, intersection, disable_tqdm, *args, **kwargs): + def reader_dispatch( + func, intersection, fill_perfdata, disable_tqdm, *args, **kwargs + ): """Create a thicket from a list, directory of files, or a single file. Arguments: @@ -353,6 +373,7 @@ def reader_dispatch(func, intersection, disable_tqdm, *args, **kwargs): thickets=ens_list, axis="index", calltree=calltree, + fill_perfdata=fill_perfdata, disable_tqdm=disable_tqdm, ) @@ -372,6 +393,7 @@ def concat_thickets( calltree (str): calltree to use -> "union" or "intersection" Keyword Arguments: + fill_perfdata (bool): (if axis="index") Whether to fill missing performance data with NaNs headers (list): (if axis="columns") List of headers to use for the new columnar multi-index metadata_key (str): (if axis="columns") Name of the column from the metadata tables to replace the 'profile' index. If no argument is provided, it is assumed that there is no profile-wise @@ -381,10 +403,16 @@ def concat_thickets( (thicket): concatenated thicket """ - def _index(thickets, from_statsframes=False, disable_tqdm=disable_tqdm): + def _index( + thickets, + from_statsframes=False, + fill_perfdata=True, + disable_tqdm=disable_tqdm, + ): thicket_parts = Ensemble._index( thickets=thickets, from_statsframes=from_statsframes, + fill_perfdata=fill_perfdata, disable_tqdm=disable_tqdm, ) @@ -1112,6 +1140,12 @@ def filter_metadata(self, select_function): else: raise InvalidFilter("The argument passed to filter must be a callable.") + # If fill_perfdata is False, may need to squash + if len(new_thicket.graph) != len( + new_thicket.dataframe.index.get_level_values("node").unique() + ): + new_thicket = new_thicket.squash() + return new_thicket def filter(self, filter_func):