From c317c7835904d435d1d6de03fe75d080e2eb32cb Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Wed, 16 Aug 2023 15:55:12 -0700 Subject: [PATCH 01/57] check .cali file type in reader_dispatch() --- thicket/thicket.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index 643650aa..8e602717 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -232,9 +232,11 @@ def reader_dispatch(func, intersection=False, *args, **kwargs): ens_list.append(Thicket.thicketize_graphframe(func(file), file)) # if directory of files elif os.path.isdir(obj): - for file in os.listdir(obj): - f = os.path.join(obj, file) - ens_list.append(Thicket.thicketize_graphframe(func(f), f)) + for root, dirs, files in os.walk(obj): + for file in files: + if file.endswith(".cali"): + f = os.path.join(root, file) + ens_list.append(Thicket.thicketize_graphframe(func(f), f)) # if single file elif os.path.isfile(obj): return Thicket.thicketize_graphframe(func(*args, **kwargs), args[0]) From 99467fb45f1aa6733ef56806e893e47ce57f4561 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Mon, 21 Aug 2023 17:53:03 -0700 Subject: [PATCH 02/57] updated produce_models() in model_extrap.py to enable measurement repetition support --- UPDATES.md | 4 + thicket/model_extrap.py | 215 +++++++++++++++++++++++----------------- 2 files changed, 130 insertions(+), 89 deletions(-) create mode 100644 UPDATES.md diff --git a/UPDATES.md b/UPDATES.md new file mode 100644 index 00000000..f0ca87af --- /dev/null +++ b/UPDATES.md @@ -0,0 +1,4 @@ +UPDATES: + +* added a check for .cali file type to reader_dispatch() so no value error is raised when other file types, folders are in the provided dir +* refactored the code in produce_models() so it can deal with measurement repetitions diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 9f779fb3..2d7e0e6b 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -11,9 +11,9 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -from extrap.entities.experiment import ( - Experiment, -) # For some reason it errors if "Experiment" is not explicitly imported +#from extrap.entities.experiment import ( +# Experiment, +#) # For some reason it errors if "Experiment" is not explicitly imported from extrap.fileio import io_helper from extrap.modelers.model_generator import ModelGenerator @@ -52,7 +52,10 @@ def display(self, RSS): # Scatter plot params = [ms.coordinate[0] for ms in measures_sorted] # X values - measures = [ms.value(True) for ms in measures_sorted] # Y values + median = [ms.median for ms in measures_sorted] + mean = [ms.mean for ms in measures_sorted] # Y values + mins = [ms.minimum for ms in measures_sorted] + maxes = [ms.maximum for ms in measures_sorted] # Line plot @@ -71,15 +74,23 @@ def display(self, RSS): ax.plot(x_vals, y_vals, label=self.mdl.hypothesis.function) # Plot scatter - ax.plot(params, measures, "ro", label=self.mdl.callpath) + + errors = [ + np.subtract(mean, mins), + np.subtract(maxes, mean), + ] + + #ax.plot(params, measures, "ro", yerr=[min,maxs], label=self.mdl.callpath) + ax.errorbar(params, mean, yerr=errors, fmt=".k", label=self.mdl.callpath) + ax.plot(params, median, "+k", label="median") ax.set_xlabel(self.param_name) ax.set_ylabel(self.mdl.metric) if RSS: ax.text( x_vals[0], - max(y_vals + measures), - "RSS = " + self.mdl.hypothesis.RSS, + max(y_vals + mean), + "RSS = " + str(self.mdl.hypothesis.RSS), ) ax.legend(loc=1) @@ -108,11 +119,15 @@ def __init__(self, tht, param_name, params=None, chosen_metrics=None): """ self.tht = tht self.param_name = param_name + #debug + print("self.param_name:",self.param_name) # Assign param # Get params from metadata table if not params: self.params = self.tht.metadata[param_name].tolist() + # remove duplicates from list + self.params = list(set(self.params)) # params must be provided by the user else: if not isinstance(params, dict): @@ -194,93 +209,115 @@ def produce_models(self, agg_func=mean, add_stats=True): add_stats (bool): Option to add hypothesis function statistics to the aggregated statistics table """ - # Setup domain values one time. Have to match ordering with range - # values (i.e. performance data table profile ordering) - param_coords = [] # default coordinates for all profiles - + + # create an extra-p experiment + from extrap.entities.experiment import Experiment + experiment = Experiment() + + # create the model parameters + #NOTE: implementation does not work for multiple model parameters + from extrap.entities.parameter import Parameter + model_parameter = Parameter(self.param_name) + experiment.add_parameter(model_parameter) + # Mapping from metadata profiles to the parameter meta_param_mapping = self.tht.metadata[self.param_name].to_dict() - - # Flipped version of mapping dictionary - meta_param_mapping_flipped = dict( - [(value, key) for key, value in meta_param_mapping.items()] - ) - + # Ordering of profiles in the performance data table ensemble_profile_ordering = list(self.tht.dataframe.index.unique(level=1)) - - # Append coordinates in order + + # create the measurement coordinates + #NOTE: implementation does not work for multiple model parameters + from extrap.entities.coordinate import Coordinate for profile in ensemble_profile_ordering: - param_coords.append( - xent.coordinate.Coordinate(float(meta_param_mapping[profile])) - ) - - # Iterate over nodes (outer index) - for node, single_node_df in self.tht.dataframe.groupby(level=0): - # Start experiment - exp = Experiment() - exp.add_parameter(xent.parameter.Parameter(self.param_name)) - - # For all chosen metrics - for met in self.chosen_metrics: - measures = [] - # Iterate over profiles (secondary index) - for profile, single_prof_df in single_node_df.groupby(level=1): - measures.append(single_prof_df[met].tolist()) - # Apply aggregation function to measurements - for i in range(len(measures)): - # Only apply aggregation function if more that one value - if isinstance(measures[i], list): - measures[i] = agg_func(measures[i]) - # Add coordinates (points at which measurements were taken) for the - # default case - if len(measures) == len(param_coords): - exp.coordinates.extend(param_coords) - # Handle case where profile(s) do not contain a measurement for the - # current node - elif len(measures) < len(param_coords): - param_coords_subset = [] - df_index = self.tht.dataframe.loc[node, met].index - for coord in param_coords: - profile = meta_param_mapping_flipped[coord._values[0]] - if df_index.isin([profile], level=0).any(): - param_coords_subset.append(coord) - else: - print( - "(Coordinate removed) Measurement at (" - + profile - + "): " - + meta_param_mapping[profile] - + "DNE for node " - + node - ) - exp.coordinates.extend(param_coords_subset) - - # Create metric object - metric_obj = xent.metric.Metric(met) - exp.add_metric(xent.metric.Metric(met)) - # Create callpath object and call tree - cpath = xent.callpath.Callpath(node.frame["name"]) - exp.add_callpath(cpath) - exp.call_tree = io_helper.create_call_tree(exp.callpaths) - # Add measurement objects to experiment - for coord, measurement in zip(exp.coordinates, measures): - measurement_obj = xent.measurement.Measurement( - coord, cpath, metric_obj, measurement - ) - exp.add_measurement(measurement_obj) - # Sanity check - io_helper.validate_experiment(exp) - # Generate model - model_gen = ModelGenerator(exp) - model_gen.model_all() - mkey = (cpath, metric_obj) - self.tht.statsframe.dataframe.at[node, met + MODEL_TAG] = ModelWrapper( - model_gen.models[mkey], self.param_name - ) - # Add statistics to aggregated statistics table - if add_stats: - self._add_extrap_statistics(node, met) + if Coordinate(float(meta_param_mapping[profile])) not in experiment.coordinates: + experiment.add_coordinate(Coordinate(float(meta_param_mapping[profile]))) + # debug + print("coordinates:",experiment.coordinates) + print("len coordinates:",len(experiment.coordinates)) + + # create the callpaths + #NOTE: could add calltree later on, possibly from hatchet data if available + from extrap.entities.callpath import Callpath + for node, _ in self.tht.dataframe.groupby(level=0): + if Callpath(node.frame["name"]) not in experiment.callpaths: + experiment.add_callpath(Callpath(node.frame["name"])) + # debug + print("Callpaths:",experiment.callpaths) + + # create the metrics + from extrap.entities.metric import Metric + for metric in self.chosen_metrics: + experiment.add_metric(Metric(metric)) + # debug + print("Metrics:",experiment.metrics) + + # iteratre over coordinates + for coordinate in experiment.coordinates: + # iterate over callpaths + for callpath in experiment.callpaths: + # iterate over metrics + for metric in experiment.metrics: + # iterate over measurements + #TODO: figure out how to access these group by data frames directly + # to remove these loops... + values = [] + for node, single_node_df in self.tht.dataframe.groupby(level=0): + if Callpath(node.frame["name"]) == callpath: + for profile, single_prof_df in single_node_df.groupby(level=1): + if Coordinate(float(meta_param_mapping[profile])) == coordinate: + # if no data is found for this config, do not anything + try: + value = single_prof_df[str(metric)].tolist() + if len(value) == 1: + values.append(value[0]) + except Exception as e: + print("Could not add measured value for:", + str(callpath), + str(metric), + str(coordinate), + ". See exception:", e) + from extrap.entities.measurement import Measurement + # if there was no data found at all for this config, do not add any measurement to the experiment + if len(values) > 0: + experiment.add_measurement(Measurement(coordinate, callpath, metric, values)) + else: + print("Could not add measurement values for:", + str(callpath), + str(metric), + str(coordinate), + ". No measured values found for this particular configuration.") + + # debug + print("DEBUG:", str(coordinate), str(callpath), str(metric), values) + + # debug + print("Measurements:",experiment.measurements) + + # create the calltree based on the callpaths + #TODO: could pip actual calltree in here... + from extrap.fileio.io_helper import create_call_tree + experiment.call_tree = create_call_tree(experiment.callpaths) + + # check the created experiment for its validty + io_helper.validate_experiment(experiment) + + # generate models using Extra-P model generator + model_gen = ModelGenerator(experiment) + model_gen.model_all() + + # add the models, and statistics into the dataframe + for callpath in experiment.callpaths: + for metric in experiment.metrics: + mkey = (callpath, metric) + for node, _ in self.tht.dataframe.groupby(level=0): + if Callpath(node.frame["name"]) == callpath: + self.tht.statsframe.dataframe.at[node, str(metric) + MODEL_TAG] = ModelWrapper( + model_gen.models[mkey], self.param_name + ) + # Add statistics to aggregated statistics table + if add_stats: + self._add_extrap_statistics(node, str(metric)) def _componentize_function(model_object): """Componentize one Extra-P modeling object into a dictionary of its parts From 30e3e3876588f454bce6304bab596491a21a7e6d Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Tue, 22 Aug 2023 10:29:52 -0700 Subject: [PATCH 03/57] added aggregation, scaling support for model creation --- thicket/model_extrap.py | 63 +++++++++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 8 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 2d7e0e6b..7eff50ae 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -198,18 +198,29 @@ def _add_extrap_statistics(self, node, metric): node, metric + "_RE" + MODEL_TAG ] = hypothesis_fn.RE - def produce_models(self, agg_func=mean, add_stats=True): + def produce_models(self, use_median=True, scaling="weak", + scaling_parameter="jobsize", add_stats=True): """Produces an Extra-P model. Models are generated by calling Extra-P's ModelGenerator. Arguments: - agg_func (function): aggregation function to apply to multi-dimensional - measurement values. Extra-P v4.0.4 applies mean by default so that is - set here for clarity. + use_median (bool): Set how Extra-P aggregates repetitions of the same + measurement configuration. If set to True, Extra-P uses the median for + model creation, otherwise it uses the mean. (Default=True) + scaling (String): Set the scaling for Extra-P model creation. Use "weak" to + read in data as a weak scaling experiment. Use "strong" to read in data + as a strong scaling experiment. The strong scaling logic is only applied + to metrics that describe application performance per rank, such as + time/rank. (Default="weak") + scaling_parameter (String): Set the scaling parameter for the experiment scaling. + Only used when using strong scaling. One needs to provide either the name of + the parameter that models the resource allocation, e.g., the jobsize, or a + a fixed int value as a String, when only scaling, e.g., the problem size, and + the resource allocation is fix. (Default="jobsize") add_stats (bool): Option to add hypothesis function statistics to the - aggregated statistics table + aggregated statistics table. (Default=True) """ - + # create an extra-p experiment from extrap.entities.experiment import Experiment experiment = Experiment() @@ -270,7 +281,43 @@ def produce_models(self, agg_func=mean, add_stats=True): try: value = single_prof_df[str(metric)].tolist() if len(value) == 1: - values.append(value[0]) + + # when measurements contain strong scaling data + # convert the data into a weak scaling experiment + if scaling == "strong": + + # convert only data for metrics that are measured per rank + if "/rank" in str(metric): + print("str(metric):",str(metric)) + + # read out scaling parameter in case strong scaling is used + # if the resource allocation is static + if scaling_parameter.isnumeric(): + ranks = int(scaling_parameter) + print("ranks:",ranks) + # otherwise read number of ranks from the provided parameter + else: + # check if the parameter exists + if scaling_parameter in self.param_name: + parameter_id = [i for i,x in enumerate(experiment.parameters) if x == Parameter(scaling_parameter)][0] + print("ranks:",coordinate.__getitem__(parameter_id)) + ranks = coordinate.__getitem__(parameter_id) + # if the specified parameter does not exist + else: + raise Exception("Specified scaling parameter could not be found in the passed list of model parameters.") + + values.append(value[0] * ranks) + + # add values for all other metrics + else: + values.append(value[0]) + + # standard weak scaling + else: + values.append(value[0]) + + + except Exception as e: print("Could not add measured value for:", str(callpath), @@ -303,7 +350,7 @@ def produce_models(self, agg_func=mean, add_stats=True): io_helper.validate_experiment(experiment) # generate models using Extra-P model generator - model_gen = ModelGenerator(experiment) + model_gen = ModelGenerator(experiment, name="Default Model", use_median=use_median) model_gen.model_all() # add the models, and statistics into the dataframe From 33db8b62ef097a82fe8819fab58e4c1b3e1466a0 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Tue, 22 Aug 2023 17:23:12 -0700 Subject: [PATCH 04/57] added basic code for interactive plot --- thicket/model_extrap.py | 57 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 7eff50ae..71d51fc3 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -17,6 +17,7 @@ from extrap.fileio import io_helper from extrap.modelers.model_generator import ModelGenerator +import copy MODEL_TAG = "_extrap-model" @@ -41,6 +42,15 @@ def eval(self, val): """Evaluate function (self) at val. f(val) = result""" return self.mdl.hypothesis.function.evaluate(val) + def simplify_function(self, model_function): + """Simplify the created model function so it is easier to read. Shortens coefficients to 3 decimals.""" + simplified_model_function = copy.deepcopy(model_function) + simplified_model_function.constant_coefficient = "{:.3f}".format(model_function.constant_coefficient) + for i in range(len(model_function.compound_terms)): + model_function.compound_terms[i].coefficient + simplified_model_function.compound_terms[i].coefficient = "{:.3f}".format(model_function.compound_terms[i].coefficient) + return simplified_model_function + def display(self, RSS): """Display function @@ -66,6 +76,9 @@ def display(self, RSS): # Y values y_vals = [self.mdl.hypothesis.function.evaluate(x) for x in x_vals] + + # for optimal scaling line + y_optimal_scaling = [mean[0] for x in x_vals] plt.ioff() fig, ax = plt.subplots() @@ -94,7 +107,49 @@ def display(self, RSS): ) ax.legend(loc=1) - return fig, ax + #return fig, ax + + model_function = self.mdl.hypothesis.function + model_function = str(self.simplify_function(model_function)) + + import plotly.graph_objects as go + + fig = go.Figure() + fig.add_trace(go.Scatter( + x=x_vals, y=y_vals, + name=model_function + )) + fig.add_trace(go.Scatter( + x=params, y=mean, + mode='markers', + name='mean', + error_y=dict( + type='data', + array=errors[0], + arrayminus=errors[1], + color='black', + thickness=1.5, + width=3, + ), + marker=dict(color='black', size=7, symbol="x") + )) + fig.add_trace(go.Scatter( + x=params, y=median, + mode='markers', + name='median', + marker=dict(color='black', size=7, symbol="triangle-up",) + )) + fig.add_trace(go.Scatter( + x=x_vals, y=y_optimal_scaling, + name="optimal scaling", + )) + fig.update_layout(template="plotly_white", + title=str(self.mdl.callpath)+"()", + xaxis_title=str(self.param_name), + yaxis_title=str(self.mdl.metric)) + #fig.show() + + return fig class Modeling: From 4640379985b2061101c3b9d0388b4b5507394e0a Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Thu, 24 Aug 2023 12:00:18 -0700 Subject: [PATCH 05/57] added exceptions for produce_models(), support for multi parameter modeling --- thicket/model_extrap.py | 446 +++++++++++++++++++++++++++------------- 1 file changed, 306 insertions(+), 140 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 71d51fc3..d48ff81e 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -16,6 +16,13 @@ #) # For some reason it errors if "Experiment" is not explicitly imported from extrap.fileio import io_helper from extrap.modelers.model_generator import ModelGenerator +from extrap.entities.experiment import Experiment +from extrap.entities.parameter import Parameter +from extrap.fileio.io_helper import create_call_tree +from extrap.entities.measurement import Measurement +from extrap.entities.metric import Metric +from extrap.entities.callpath import Callpath +from extrap.entities.coordinate import Coordinate import copy @@ -51,11 +58,79 @@ def simplify_function(self, model_function): simplified_model_function.compound_terms[i].coefficient = "{:.3f}".format(model_function.compound_terms[i].coefficient) return simplified_model_function - def display(self, RSS): - """Display function + def display_measurements(self): + """_summary_ + """ + import numpy as np + import matplotlib.pyplot as plt + from matplotlib.widgets import Slider, Button - Arguments: - RSS (bool): whether to display Extra-P RSS on the plot + + # The parametrized function to be plotted + def f(t, amplitude, frequency): + return amplitude * np.sin(2 * np.pi * frequency * t) + + t = np.linspace(0, 1, 1000) + + # Define initial parameters + init_amplitude = 5 + init_frequency = 3 + + # Create the figure and the line that we will manipulate + fig, ax = plt.subplots() + line, = ax.plot(t, f(t, init_amplitude, init_frequency), lw=2) + ax.set_xlabel('Time [s]') + + # adjust the main plot to make room for the sliders + fig.subplots_adjust(left=0.25, bottom=0.25) + + # Make a horizontal slider to control the frequency. + axfreq = fig.add_axes([0.25, 0.1, 0.65, 0.03]) + freq_slider = Slider( + ax=axfreq, + label='Frequency [Hz]', + valmin=0.1, + valmax=30, + valinit=init_frequency, + ) + + # Make a vertically oriented slider to control the amplitude + axamp = fig.add_axes([0.1, 0.25, 0.0225, 0.63]) + amp_slider = Slider( + ax=axamp, + label="Amplitude", + valmin=0, + valmax=10, + valinit=init_amplitude, + orientation="vertical" + ) + + + # The function to be called anytime a slider's value changes + def update(val): + line.set_ydata(f(t, amp_slider.val, freq_slider.val)) + fig.canvas.draw_idle() + + + # register the update function with each slider + freq_slider.on_changed(update) + amp_slider.on_changed(update) + + # Create a `matplotlib.widgets.Button` to reset the sliders to initial values. + resetax = fig.add_axes([0.8, 0.025, 0.1, 0.04]) + button = Button(resetax, 'Reset', hovercolor='0.975') + + + def reset(event): + freq_slider.reset() + amp_slider.reset() + button.on_clicked(reset) + + #plt.show() + return fig + + def display_interactive(self): + """_summary_ """ # Sort based on x values measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) @@ -69,22 +144,35 @@ def display(self, RSS): # Line plot - # X value plotting range. Dynamic based off what the largest/smallest values are - x_vals = np.arange( - params[0], 1.5 * params[-1], (params[-1] - params[0]) / 100.0 - ) - - # Y values - y_vals = [self.mdl.hypothesis.function.evaluate(x) for x in x_vals] - - # for optimal scaling line - y_optimal_scaling = [mean[0] for x in x_vals] - plt.ioff() fig, ax = plt.subplots() + fig.subplots_adjust(bottom=0.2) + + # Function to plot graph + # according to expression + def visualizeGraph(x_max): + # X value plotting range. Dynamic based off what the largest/smallest values are + x_vals = np.arange( + params[0], float(x_max), (params[-1] - params[0]) / 100.0 + ) + # Y values + y_vals = [self.mdl.hypothesis.function.evaluate(x) for x in x_vals] + + # Plot line + l, = ax.plot(x_vals, y_vals, label=self.mdl.hypothesis.function) + #l, = ax.plot(t, np.zeros_like(t), lw=2) + l.set_ydata(y_vals) + ax.relim() + ax.autoscale_view() + plt.draw() + + # Adding TextBox to graph + from matplotlib.widgets import TextBox + graphBox = fig.add_axes([0.1, 0.05, 0.8, 0.075]) + txtBox = TextBox(graphBox, "Max x: ") + txtBox.on_submit(visualizeGraph) + txtBox.set_val(str(1.5 * params[-1])) - # Plot line - ax.plot(x_vals, y_vals, label=self.mdl.hypothesis.function) # Plot scatter @@ -99,18 +187,44 @@ def display(self, RSS): ax.set_xlabel(self.param_name) ax.set_ylabel(self.mdl.metric) - if RSS: - ax.text( - x_vals[0], - max(y_vals + mean), - "RSS = " + str(self.mdl.hypothesis.RSS), - ) ax.legend(loc=1) - #return fig, ax + return plt + + + def display_plotly(self): + """_summary_ + """ + # Sort based on x values + measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) + + # Scatter plot + params = [ms.coordinate[0] for ms in measures_sorted] # X values + median = [ms.median for ms in measures_sorted] + mean = [ms.mean for ms in measures_sorted] # Y values + mins = [ms.minimum for ms in measures_sorted] + maxes = [ms.maximum for ms in measures_sorted] + + # Line plot + + # X value plotting range. Dynamic based off what the largest/smallest values are + x_vals = np.arange( + params[0], 1.5 * params[-1], (params[-1] - params[0]) / 100.0 + ) + + # Y values + y_vals = [self.mdl.hypothesis.function.evaluate(x) for x in x_vals] model_function = self.mdl.hypothesis.function model_function = str(self.simplify_function(model_function)) + + # for optimal scaling line + y_optimal_scaling = [mean[0] for x in x_vals] + + errors = [ + np.subtract(mean, mins), + np.subtract(maxes, mean), + ] import plotly.graph_objects as go @@ -147,15 +261,69 @@ def display(self, RSS): title=str(self.mdl.callpath)+"()", xaxis_title=str(self.param_name), yaxis_title=str(self.mdl.metric)) - #fig.show() - return fig + def display(self, RSS): + """Display function + + Arguments: + RSS (bool): whether to display Extra-P RSS on the plot + """ + # Sort based on x values + measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) + + # Scatter plot + params = [ms.coordinate[0] for ms in measures_sorted] # X values + median = [ms.median for ms in measures_sorted] + mean = [ms.mean for ms in measures_sorted] # Y values + mins = [ms.minimum for ms in measures_sorted] + maxes = [ms.maximum for ms in measures_sorted] + + # Line plot + + # X value plotting range. Dynamic based off what the largest/smallest values are + x_vals = np.arange( + params[0], 1.5 * params[-1], (params[-1] - params[0]) / 100.0 + ) + + print("model:",self.mdl.hypothesis.function) + + # Y values + y_vals = [self.mdl.hypothesis.function.evaluate(x) for x in x_vals] + + plt.ioff() + fig, ax = plt.subplots() + + # Plot line + ax.plot(x_vals, y_vals, label=self.mdl.hypothesis.function) + + # Plot scatter + + errors = [ + np.subtract(mean, mins), + np.subtract(maxes, mean), + ] + + #ax.plot(params, measures, "ro", yerr=[min,maxs], label=self.mdl.callpath) + ax.errorbar(params, mean, yerr=errors, fmt=".k", label=self.mdl.callpath) + ax.plot(params, median, "+k", label="median") + + ax.set_xlabel(self.param_name) + ax.set_ylabel(self.mdl.metric) + if RSS: + ax.text( + x_vals[0], + max(y_vals + mean), + "RSS = " + str(self.mdl.hypothesis.RSS), + ) + ax.legend(loc=1) + + return fig, ax class Modeling: """Produce models for all the metrics across the given graphframes.""" - def __init__(self, tht, param_name, params=None, chosen_metrics=None): + def __init__(self, tht, parameters=None, metrics=None): """Create a new model object. Adds a model column for each metric for each common frame across all the @@ -166,46 +334,26 @@ def __init__(self, tht, param_name, params=None, chosen_metrics=None): Arguments: tht (Thicket): thicket object - param_name (str): arbitrary if 'params' is being provided, otherwise name of - the metadata column from which 'params' will be extracted - params (list): parameters list, domain for the model - chosen_metrics (list): metrics to be evaluated in the model, range for the - model + parameters (list): A list of String values of the parameters that will be considered for + modeling by Extra-P. + metrics (list): A list of String value of the metrics Extra-P will create models for. """ self.tht = tht - self.param_name = param_name - #debug - print("self.param_name:",self.param_name) - - # Assign param - # Get params from metadata table - if not params: - self.params = self.tht.metadata[param_name].tolist() - # remove duplicates from list - self.params = list(set(self.params)) - # params must be provided by the user + + # if there were no parameters provided use the jobsize to create models, + # which should always be available + if not parameters: + self.parameters = ["jobsize"] else: - if not isinstance(params, dict): - raise TypeError("'params' must be provided as a dict") - elif len(params) != len(self.tht.profile): - raise ValueError( - "length of params must equal amount of profiles " - + len(params) - + "!= " - + len(self.tht.profile) - ) - profile_mapping_flipped = { - v: k for k, v in self.tht.profile_mapping.items() - } - for file_name, value in params.items(): - self.tht.metadata.at[ - profile_mapping_flipped[file_name], param_name - ] = value - self.params = tht.metadata[param_name].tolist() - if not chosen_metrics: - self.chosen_metrics = self.tht.exc_metrics + self.tht.inc_metrics + self.parameters = parameters + + # if no metrics have been provided create models for all existing metrics + if not metrics: + self.metrics = self.tht.exc_metrics + self.tht.inc_metrics else: - self.chosen_metrics = chosen_metrics + self.metrics = metrics + + self.experiment = None def to_html(self, RSS=False): def model_to_img_html(model_obj): @@ -217,12 +365,24 @@ def model_to_img_html(model_obj): imgstr = ''.format(figdata_jpg) plt.close(fig) return imgstr - - frm_dict = {met + MODEL_TAG: model_to_img_html for met in self.chosen_metrics} + + # catch key errors when queriying for models with a callpath, metric combination + # that does not exist because there was no measurement object created for them + existing_metrics = [] + for callpath in self.experiment.callpaths: + for metric in self.experiment.metrics: + try: + self.experiment.modelers[0].models[(callpath, metric)] + if str(metric) not in existing_metrics: + existing_metrics.append(str(metric)) + except KeyError: + pass + + frm_dict = {met + MODEL_TAG: model_to_img_html for met in existing_metrics} # Subset of the aggregated statistics table with only the Extra-P columns selected return self.tht.statsframe.dataframe[ - [met + MODEL_TAG for met in self.chosen_metrics] + [met + MODEL_TAG for met in existing_metrics] ].to_html(escape=False, formatters=frm_dict) def _add_extrap_statistics(self, node, metric): @@ -253,7 +413,7 @@ def _add_extrap_statistics(self, node, metric): node, metric + "_RE" + MODEL_TAG ] = hypothesis_fn.RE - def produce_models(self, use_median=True, scaling="weak", + def produce_models(self, use_median=True, calc_total_metrics=False, scaling_parameter="jobsize", add_stats=True): """Produces an Extra-P model. Models are generated by calling Extra-P's ModelGenerator. @@ -262,49 +422,54 @@ def produce_models(self, use_median=True, scaling="weak", use_median (bool): Set how Extra-P aggregates repetitions of the same measurement configuration. If set to True, Extra-P uses the median for model creation, otherwise it uses the mean. (Default=True) - scaling (String): Set the scaling for Extra-P model creation. Use "weak" to - read in data as a weak scaling experiment. Use "strong" to read in data - as a strong scaling experiment. The strong scaling logic is only applied - to metrics that describe application performance per rank, such as - time/rank. (Default="weak") - scaling_parameter (String): Set the scaling parameter for the experiment scaling. - Only used when using strong scaling. One needs to provide either the name of - the parameter that models the resource allocation, e.g., the jobsize, or a - a fixed int value as a String, when only scaling, e.g., the problem size, and - the resource allocation is fix. (Default="jobsize") + calc_total_metrics (bool): Set calc_total_metrics to True to let Extra-P + internally calculate the total metric values for metrics measured + per MPI rank, e.g., the average runtime/rank. (Default=False) + scaling_parameter (String): Set the scaling parameter for the total metric + calculation. This parameter is only used when calc_total_metrics=True. + One needs to provide either the name of the parameter that models the + resource allocation, e.g., the jobsize, or a fixed int value as a String, + when only scaling, e.g., the problem size, and the resource allocation + is fix. (Default="jobsize") add_stats (bool): Option to add hypothesis function statistics to the aggregated statistics table. (Default=True) """ # create an extra-p experiment - from extrap.entities.experiment import Experiment experiment = Experiment() # create the model parameters - #NOTE: implementation does not work for multiple model parameters - from extrap.entities.parameter import Parameter - model_parameter = Parameter(self.param_name) - experiment.add_parameter(model_parameter) + for parameter in self.parameters: + experiment.add_parameter(Parameter(parameter)) + print("Parameters:",experiment.parameters) # Mapping from metadata profiles to the parameter - meta_param_mapping = self.tht.metadata[self.param_name].to_dict() + #meta_param_mapping = self.tht.metadata[self.parameters].to_dict() # Ordering of profiles in the performance data table ensemble_profile_ordering = list(self.tht.dataframe.index.unique(level=1)) + profile_parameter_value_mapping = {} + for profile in ensemble_profile_ordering: + profile_parameter_value_mapping[profile] = [] + + for parameter in self.parameters: + current_param_mapping = self.tht.metadata[parameter].to_dict() + for key, value in current_param_mapping.items(): + profile_parameter_value_mapping[key].append(float(value)) + + print("profile_parameter_value_mapping:",profile_parameter_value_mapping) + # create the measurement coordinates - #NOTE: implementation does not work for multiple model parameters - from extrap.entities.coordinate import Coordinate for profile in ensemble_profile_ordering: - if Coordinate(float(meta_param_mapping[profile])) not in experiment.coordinates: - experiment.add_coordinate(Coordinate(float(meta_param_mapping[profile]))) + if Coordinate(profile_parameter_value_mapping[profile]) not in experiment.coordinates: + experiment.add_coordinate(Coordinate(profile_parameter_value_mapping[profile])) # debug print("coordinates:",experiment.coordinates) print("len coordinates:",len(experiment.coordinates)) # create the callpaths #NOTE: could add calltree later on, possibly from hatchet data if available - from extrap.entities.callpath import Callpath for node, _ in self.tht.dataframe.groupby(level=0): if Callpath(node.frame["name"]) not in experiment.callpaths: experiment.add_callpath(Callpath(node.frame["name"])) @@ -312,40 +477,44 @@ def produce_models(self, use_median=True, scaling="weak", print("Callpaths:",experiment.callpaths) # create the metrics - from extrap.entities.metric import Metric - for metric in self.chosen_metrics: + for metric in self.metrics: experiment.add_metric(Metric(metric)) # debug print("Metrics:",experiment.metrics) + #TODO: check what happens when there are not enough measurements to create a model with extrap + # iteratre over coordinates for coordinate in experiment.coordinates: # iterate over callpaths for callpath in experiment.callpaths: # iterate over metrics for metric in experiment.metrics: - # iterate over measurements - #TODO: figure out how to access these group by data frames directly - # to remove these loops... - values = [] - for node, single_node_df in self.tht.dataframe.groupby(level=0): - if Callpath(node.frame["name"]) == callpath: - for profile, single_prof_df in single_node_df.groupby(level=1): - if Coordinate(float(meta_param_mapping[profile])) == coordinate: - # if no data is found for this config, do not anything - try: - value = single_prof_df[str(metric)].tolist() + # iterate over the measured values in each profile + try: + values = [] + callpath_exists = False + #NOTE: potentially there is a better way to access the dataframes without looping + #NOTE: in addition it would be nice to have exceptions raised to let the user know + # when a callpath does not exist in a profile, not only if it does not exist at all + for node, single_node_df in self.tht.dataframe.groupby(level=0): + if Callpath(node.frame["name"]) == callpath: + callpath_exists = True + coordinate_exists = False + for profile, single_prof_df in single_node_df.groupby(level=1): + if Coordinate(profile_parameter_value_mapping[profile]) == coordinate: + coordinate_exists = True + try: + value = single_prof_df[str(metric)].tolist() + except Exception: + raise Exception("The metric \'"+str(metric)+"\' does not exist in the profile \'"+str(profile)+"\'.") if len(value) == 1: - - # when measurements contain strong scaling data - # convert the data into a weak scaling experiment - if scaling == "strong": - + # calculate total metric values + if calc_total_metrics == True: # convert only data for metrics that are measured per rank if "/rank" in str(metric): print("str(metric):",str(metric)) - - # read out scaling parameter in case strong scaling is used + # read out scaling parameter for total metric value calculation # if the resource allocation is static if scaling_parameter.isnumeric(): ranks = int(scaling_parameter) @@ -353,52 +522,41 @@ def produce_models(self, use_median=True, scaling="weak", # otherwise read number of ranks from the provided parameter else: # check if the parameter exists - if scaling_parameter in self.param_name: + if scaling_parameter in self.parameters: parameter_id = [i for i,x in enumerate(experiment.parameters) if x == Parameter(scaling_parameter)][0] print("ranks:",coordinate.__getitem__(parameter_id)) ranks = coordinate.__getitem__(parameter_id) # if the specified parameter does not exist else: - raise Exception("Specified scaling parameter could not be found in the passed list of model parameters.") - + raise Exception("The specified scaling parameter \'"+str(scaling_parameter)+"\' could not be found in the passed list of model parameters "+str(self.parameters)+".") values.append(value[0] * ranks) - # add values for all other metrics else: values.append(value[0]) - - # standard weak scaling + # standard use case, simply add measured values without manipulating them else: values.append(value[0]) - - - - except Exception as e: - print("Could not add measured value for:", - str(callpath), - str(metric), - str(coordinate), - ". See exception:", e) - from extrap.entities.measurement import Measurement + else: + raise Exception("There are no values recorded for the metric \'"+str(metric)+"\' in the profile \'"+str(profile)+"\'.") + if coordinate_exists == False: + raise Exception("The parameter value combintation \'"+str(coordinate)+"\' could not be matched to any of the profiles. This could indicate missing metadata values for one or more of the parameters specified for modeling.") + if callpath_exists == False: + raise Exception("The node/callpath \'"+str(callpath)+"\' does not exist in any of the profiles.") + except Exception as e: + print("WARNING: Could not create an Extra-P measurement for: callpath=\'"+str(callpath)+"\', metric=\'"+str(metric)+"\', coordinate=\'"+str(coordinate)+"\'. "+str(e)) + # if there was no data found at all for this config, do not add any measurement to the experiment if len(values) > 0: experiment.add_measurement(Measurement(coordinate, callpath, metric, values)) - else: - print("Could not add measurement values for:", - str(callpath), - str(metric), - str(coordinate), - ". No measured values found for this particular configuration.") - + # debug print("DEBUG:", str(coordinate), str(callpath), str(metric), values) # debug - print("Measurements:",experiment.measurements) + #print("Measurements:",experiment.measurements) # create the calltree based on the callpaths - #TODO: could pip actual calltree in here... - from extrap.fileio.io_helper import create_call_tree + #NOTE: could pipe actual calltree in here experiment.call_tree = create_call_tree(experiment.callpaths) # check the created experiment for its validty @@ -407,6 +565,7 @@ def produce_models(self, use_median=True, scaling="weak", # generate models using Extra-P model generator model_gen = ModelGenerator(experiment, name="Default Model", use_median=use_median) model_gen.model_all() + experiment.add_modeler(model_gen) # add the models, and statistics into the dataframe for callpath in experiment.callpaths: @@ -414,12 +573,19 @@ def produce_models(self, use_median=True, scaling="weak", mkey = (callpath, metric) for node, _ in self.tht.dataframe.groupby(level=0): if Callpath(node.frame["name"]) == callpath: - self.tht.statsframe.dataframe.at[node, str(metric) + MODEL_TAG] = ModelWrapper( - model_gen.models[mkey], self.param_name - ) - # Add statistics to aggregated statistics table - if add_stats: - self._add_extrap_statistics(node, str(metric)) + # catch key errors when queriying for models with a callpath, metric combination + # that does not exist because there was no measurement object created for them + try: + self.tht.statsframe.dataframe.at[node, str(metric) + MODEL_TAG] = ModelWrapper( + model_gen.models[mkey], self.parameters + ) + # Add statistics to aggregated statistics table + if add_stats: + self._add_extrap_statistics(node, str(metric)) + except Exception: + pass + + self.experiment = experiment def _componentize_function(model_object): """Componentize one Extra-P modeling object into a dictionary of its parts From 13e602d5d4618b875f10bc130ded65b93a2b8650 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Thu, 24 Aug 2023 15:16:23 -0700 Subject: [PATCH 06/57] added custom exception class for extra-p reader --- thicket/model_extrap.py | 42 ++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index d48ff81e..6c4ef86e 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -28,7 +28,25 @@ MODEL_TAG = "_extrap-model" +class ExtrapReaderException(Exception): + """Custom exception class for raising exceptions while reading in data from + a pandas type dataframe from a thicket object into the Extra-P experiment object. + Args: + Exception (Exception): Python base Exception object. + """ + + def __init__(self, message: str, profile: int) -> None: + """Initialization function for the custom Extra-P reader exception class. + + Args: + message (str): The message the exception should pass on. + profile (int): The hash of the profile that is currently read in as an int value. + """ + super().__init__() + self.message = message + self.profile = profile + class ModelWrapper: """Wrapper for an Extra-P model. @@ -482,8 +500,6 @@ def produce_models(self, use_median=True, calc_total_metrics=False, # debug print("Metrics:",experiment.metrics) - #TODO: check what happens when there are not enough measurements to create a model with extrap - # iteratre over coordinates for coordinate in experiment.coordinates: # iterate over callpaths @@ -494,20 +510,20 @@ def produce_models(self, use_median=True, calc_total_metrics=False, try: values = [] callpath_exists = False - #NOTE: potentially there is a better way to access the dataframes without looping - #NOTE: in addition it would be nice to have exceptions raised to let the user know - # when a callpath does not exist in a profile, not only if it does not exist at all + #NOTE: potentially there is a better way to access the dataframes without looping for node, single_node_df in self.tht.dataframe.groupby(level=0): if Callpath(node.frame["name"]) == callpath: callpath_exists = True coordinate_exists = False for profile, single_prof_df in single_node_df.groupby(level=1): + if str(callpath) not in single_prof_df["name"].values: + raise ExtrapReaderException("The callpath \'"+str(callpath)+"\' does not exist in the profile \'"+str(profile)+"\'.", profile) if Coordinate(profile_parameter_value_mapping[profile]) == coordinate: coordinate_exists = True try: value = single_prof_df[str(metric)].tolist() except Exception: - raise Exception("The metric \'"+str(metric)+"\' does not exist in the profile \'"+str(profile)+"\'.") + raise ExtrapReaderException("The metric \'"+str(metric)+"\' does not exist in the profile \'"+str(profile)+"\'.", profile) if len(value) == 1: # calculate total metric values if calc_total_metrics == True: @@ -528,7 +544,7 @@ def produce_models(self, use_median=True, calc_total_metrics=False, ranks = coordinate.__getitem__(parameter_id) # if the specified parameter does not exist else: - raise Exception("The specified scaling parameter \'"+str(scaling_parameter)+"\' could not be found in the passed list of model parameters "+str(self.parameters)+".") + raise ExtrapReaderException("The specified scaling parameter \'"+str(scaling_parameter)+"\' could not be found in the passed list of model parameters "+str(self.parameters)+".", profile) values.append(value[0] * ranks) # add values for all other metrics else: @@ -537,14 +553,14 @@ def produce_models(self, use_median=True, calc_total_metrics=False, else: values.append(value[0]) else: - raise Exception("There are no values recorded for the metric \'"+str(metric)+"\' in the profile \'"+str(profile)+"\'.") + raise ExtrapReaderException("There are no values recorded for the metric \'"+str(metric)+"\' in the profile \'"+str(profile)+"\'.", profile) if coordinate_exists == False: - raise Exception("The parameter value combintation \'"+str(coordinate)+"\' could not be matched to any of the profiles. This could indicate missing metadata values for one or more of the parameters specified for modeling.") + raise ExtrapReaderException("The parameter value combintation \'"+str(coordinate)+"\' could not be matched to any of the profiles. This could indicate missing metadata values for one or more of the parameters specified for modeling.", profile) if callpath_exists == False: - raise Exception("The node/callpath \'"+str(callpath)+"\' does not exist in any of the profiles.") - except Exception as e: - print("WARNING: Could not create an Extra-P measurement for: callpath=\'"+str(callpath)+"\', metric=\'"+str(metric)+"\', coordinate=\'"+str(coordinate)+"\'. "+str(e)) - + raise ExtrapReaderException("The node/callpath \'"+str(callpath)+"\' does not exist in any of the profiles.", profile) + except ExtrapReaderException as e: + print("WARNING: Could not create an Extra-P measurement object for: callpath=\'"+str(callpath)+"\', metric=\'"+str(metric)+"\', coordinate=\'"+str(coordinate)+"\' from the profile: "+str(e.profile)+". "+str(e.message)) + # if there was no data found at all for this config, do not add any measurement to the experiment if len(values) > 0: experiment.add_measurement(Measurement(coordinate, callpath, metric, values)) From 83d94b084f1c426524c25c6d2c2aaec1ddf835a8 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Thu, 24 Aug 2023 15:22:15 -0700 Subject: [PATCH 07/57] code cleanup, removed debug code --- thicket/model_extrap.py | 33 +++------------------------------ 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 6c4ef86e..fc8a81c4 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -4,16 +4,13 @@ # SPDX-License-Identifier: MIT import base64 +import copy from io import BytesIO -from statistics import mean -import extrap.entities as xent import matplotlib.pyplot as plt import numpy as np import pandas as pd -#from extrap.entities.experiment import ( -# Experiment, -#) # For some reason it errors if "Experiment" is not explicitly imported + from extrap.fileio import io_helper from extrap.modelers.model_generator import ModelGenerator from extrap.entities.experiment import Experiment @@ -24,8 +21,6 @@ from extrap.entities.callpath import Callpath from extrap.entities.coordinate import Coordinate -import copy - MODEL_TAG = "_extrap-model" class ExtrapReaderException(Exception): @@ -459,10 +454,6 @@ def produce_models(self, use_median=True, calc_total_metrics=False, # create the model parameters for parameter in self.parameters: experiment.add_parameter(Parameter(parameter)) - print("Parameters:",experiment.parameters) - - # Mapping from metadata profiles to the parameter - #meta_param_mapping = self.tht.metadata[self.parameters].to_dict() # Ordering of profiles in the performance data table ensemble_profile_ordering = list(self.tht.dataframe.index.unique(level=1)) @@ -475,30 +466,21 @@ def produce_models(self, use_median=True, calc_total_metrics=False, current_param_mapping = self.tht.metadata[parameter].to_dict() for key, value in current_param_mapping.items(): profile_parameter_value_mapping[key].append(float(value)) - - print("profile_parameter_value_mapping:",profile_parameter_value_mapping) # create the measurement coordinates for profile in ensemble_profile_ordering: if Coordinate(profile_parameter_value_mapping[profile]) not in experiment.coordinates: experiment.add_coordinate(Coordinate(profile_parameter_value_mapping[profile])) - # debug - print("coordinates:",experiment.coordinates) - print("len coordinates:",len(experiment.coordinates)) - + # create the callpaths #NOTE: could add calltree later on, possibly from hatchet data if available for node, _ in self.tht.dataframe.groupby(level=0): if Callpath(node.frame["name"]) not in experiment.callpaths: experiment.add_callpath(Callpath(node.frame["name"])) - # debug - print("Callpaths:",experiment.callpaths) # create the metrics for metric in self.metrics: experiment.add_metric(Metric(metric)) - # debug - print("Metrics:",experiment.metrics) # iteratre over coordinates for coordinate in experiment.coordinates: @@ -529,18 +511,15 @@ def produce_models(self, use_median=True, calc_total_metrics=False, if calc_total_metrics == True: # convert only data for metrics that are measured per rank if "/rank" in str(metric): - print("str(metric):",str(metric)) # read out scaling parameter for total metric value calculation # if the resource allocation is static if scaling_parameter.isnumeric(): ranks = int(scaling_parameter) - print("ranks:",ranks) # otherwise read number of ranks from the provided parameter else: # check if the parameter exists if scaling_parameter in self.parameters: parameter_id = [i for i,x in enumerate(experiment.parameters) if x == Parameter(scaling_parameter)][0] - print("ranks:",coordinate.__getitem__(parameter_id)) ranks = coordinate.__getitem__(parameter_id) # if the specified parameter does not exist else: @@ -564,12 +543,6 @@ def produce_models(self, use_median=True, calc_total_metrics=False, # if there was no data found at all for this config, do not add any measurement to the experiment if len(values) > 0: experiment.add_measurement(Measurement(coordinate, callpath, metric, values)) - - # debug - print("DEBUG:", str(coordinate), str(callpath), str(metric), values) - - # debug - #print("Measurements:",experiment.measurements) # create the calltree based on the callpaths #NOTE: could pipe actual calltree in here From e9ff10135e946b288f5a26d8d622dc161c49772e Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Thu, 24 Aug 2023 17:27:55 -0700 Subject: [PATCH 08/57] added basic plotting support for 2 parameters --- thicket/model_extrap.py | 157 ++++++++++++++++++++-------------------- 1 file changed, 80 insertions(+), 77 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index fc8a81c4..e4618c7e 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -50,9 +50,9 @@ class ModelWrapper: the model. """ - def __init__(self, mdl, param_name): + def __init__(self, mdl, parameters): self.mdl = mdl - self.param_name = param_name # Needed for plotting / displaying the model + self.parameters = parameters def __str__(self): """Display self as a function""" @@ -198,90 +198,19 @@ def visualizeGraph(x_max): ax.errorbar(params, mean, yerr=errors, fmt=".k", label=self.mdl.callpath) ax.plot(params, median, "+k", label="median") - ax.set_xlabel(self.param_name) + ax.set_xlabel(self.parameters) ax.set_ylabel(self.mdl.metric) ax.legend(loc=1) return plt - - def display_plotly(self): - """_summary_ - """ - # Sort based on x values - measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) - - # Scatter plot - params = [ms.coordinate[0] for ms in measures_sorted] # X values - median = [ms.median for ms in measures_sorted] - mean = [ms.mean for ms in measures_sorted] # Y values - mins = [ms.minimum for ms in measures_sorted] - maxes = [ms.maximum for ms in measures_sorted] - - # Line plot - - # X value plotting range. Dynamic based off what the largest/smallest values are - x_vals = np.arange( - params[0], 1.5 * params[-1], (params[-1] - params[0]) / 100.0 - ) - - # Y values - y_vals = [self.mdl.hypothesis.function.evaluate(x) for x in x_vals] - - model_function = self.mdl.hypothesis.function - model_function = str(self.simplify_function(model_function)) - - # for optimal scaling line - y_optimal_scaling = [mean[0] for x in x_vals] - - errors = [ - np.subtract(mean, mins), - np.subtract(maxes, mean), - ] - - import plotly.graph_objects as go - - fig = go.Figure() - fig.add_trace(go.Scatter( - x=x_vals, y=y_vals, - name=model_function - )) - fig.add_trace(go.Scatter( - x=params, y=mean, - mode='markers', - name='mean', - error_y=dict( - type='data', - array=errors[0], - arrayminus=errors[1], - color='black', - thickness=1.5, - width=3, - ), - marker=dict(color='black', size=7, symbol="x") - )) - fig.add_trace(go.Scatter( - x=params, y=median, - mode='markers', - name='median', - marker=dict(color='black', size=7, symbol="triangle-up",) - )) - fig.add_trace(go.Scatter( - x=x_vals, y=y_optimal_scaling, - name="optimal scaling", - )) - fig.update_layout(template="plotly_white", - title=str(self.mdl.callpath)+"()", - xaxis_title=str(self.param_name), - yaxis_title=str(self.mdl.metric)) - return fig - - def display(self, RSS): + def display_one_parameter_model(self, RSS): """Display function Arguments: RSS (bool): whether to display Extra-P RSS on the plot """ + # Sort based on x values measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) @@ -321,7 +250,7 @@ def display(self, RSS): ax.errorbar(params, mean, yerr=errors, fmt=".k", label=self.mdl.callpath) ax.plot(params, median, "+k", label="median") - ax.set_xlabel(self.param_name) + ax.set_xlabel(self.parameters[0]) ax.set_ylabel(self.mdl.metric) if RSS: ax.text( @@ -332,7 +261,81 @@ def display(self, RSS): ax.legend(loc=1) return fig, ax + + def display_two_parameter_model(self, RSS): + """Display function + + Arguments: + RSS (bool): whether to display Extra-P RSS on the plot + """ + + # Sort based on x and y values + measures_sorted = sorted(self.mdl.measurements, key=lambda x: (x.coordinate[0], x.coordinate[1])) + + # Scatter plot + X_params = [ms.coordinate[0] for ms in measures_sorted] # X values + Y_params = [ms.coordinate[1] for ms in measures_sorted] # Y values + + # Z values + medians = [ms.median for ms in measures_sorted] + means = [ms.mean for ms in measures_sorted] + mins = [ms.minimum for ms in measures_sorted] + maxes = [ms.maximum for ms in measures_sorted] + + # Surface plot + # X value plotting range. Dynamic based off what the largest/smallest values are + x_vals = np.linspace( + start=X_params[0], stop=1.5 * X_params[-1], num=100 + ) + # Y value plotting range. Dynamic based off what the largest/smallest values are + y_vals = np.linspace( + start=Y_params[0], stop=1.5 * Y_params[-1], num=100 + ) + + #print("model:",self.mdl.hypothesis.function) + + x_vals, y_vals = np.meshgrid(x_vals, y_vals) + + z_vals = self.mdl.hypothesis.function.evaluate([x_vals, y_vals]) + + zuplims = np.subtract(means, mins) + zlolims = np.subtract(maxes, means) + + plt.ioff() + + fig = plt.figure() + ax = fig.gca(projection='3d') + + axis_lengths = [len(x_vals), len(y_vals), len(z_vals)] + + # plot model as surface plot + ax.plot_surface(x_vals, y_vals, z_vals, label=self.mdl.hypothesis.function) + + # plot the measurement points + #ax.scatter(X_params, Y_params, medians, fmt=".k", label=self.mdl.callpath) + #ax.errorbar(X_params, Y_params, means, zuplims=zuplims, zlolims=zlolims, fmt=".k", label=self.mdl.callpath) + + return fig, ax + def display(self, RSS): + """Display function + + Arguments: + RSS (bool): whether to display Extra-P RSS on the plot + """ + + # check number of model parameters + if len(self.parameters) == 1: + fig, ax = self.display_one_parameter_model(RSS) + + elif len(self.parameters) == 2: + fig, ax = self.display_two_parameter_model(RSS) + + else: + raise Exception("Plotting performance models with "+str(len(self.parameters))+" parameters is currently not supported.") + + return fig, ax + class Modeling: """Produce models for all the metrics across the given graphframes.""" From 400a45302a589208063acc9ca5eb1877c6fbbf7f Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Fri, 25 Aug 2023 19:32:14 -0700 Subject: [PATCH 09/57] improved 2 parameter model visualization --- thicket/model_extrap.py | 80 ++++++++++++++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 13 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index e4618c7e..80a66301 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -8,6 +8,8 @@ from io import BytesIO import matplotlib.pyplot as plt +from matplotlib import patches as mpatches +import matplotlib.lines as mlines import numpy as np import pandas as pd @@ -262,6 +264,30 @@ def display_one_parameter_model(self, RSS): return fig, ax + def draw_legend(self, ax_all, dict_callpath_color): + # draw legend + handles = list() + for key, value in dict_callpath_color.items(): + labelName = str(key) + if value[0] == "surface": + patch = mpatches.Patch(color=value[1], label=labelName) + handles.append(patch) + elif value[0] == "mean": + mark = mlines.Line2D([], [], color=value[1], marker='+', linestyle='None', + markersize=10, label=labelName) + handles.append(mark) + elif value[0] == "median": + mark = mlines.Line2D([], [], color=value[1], marker='x', linestyle='None', + markersize=10, label=labelName) + handles.append(mark) + elif value[0] == "min" or value[0] == "max": + mark = mlines.Line2D([], [], color=value[1], marker='_', linestyle='None', + markersize=10, label=labelName) + handles.append(mark) + + ax_all.legend(handles=handles, + loc="upper right", bbox_to_anchor=(2.5, 1)) + def display_two_parameter_model(self, RSS): """Display function @@ -269,6 +295,10 @@ def display_two_parameter_model(self, RSS): RSS (bool): whether to display Extra-P RSS on the plot """ + #TODO: add parameters to display mean, median, min, max + # optiomal scaling surface + # change plot opacity based on if measurements are displayed + # Sort based on x and y values measures_sorted = sorted(self.mdl.measurements, key=lambda x: (x.coordinate[0], x.coordinate[1])) @@ -291,29 +321,53 @@ def display_two_parameter_model(self, RSS): y_vals = np.linspace( start=Y_params[0], stop=1.5 * Y_params[-1], num=100 ) - - #print("model:",self.mdl.hypothesis.function) - + x_vals, y_vals = np.meshgrid(x_vals, y_vals) - z_vals = self.mdl.hypothesis.function.evaluate([x_vals, y_vals]) - zuplims = np.subtract(means, mins) - zlolims = np.subtract(maxes, means) - plt.ioff() fig = plt.figure() ax = fig.gca(projection='3d') - - axis_lengths = [len(x_vals), len(y_vals), len(z_vals)] - + # plot model as surface plot - ax.plot_surface(x_vals, y_vals, z_vals, label=self.mdl.hypothesis.function) + ax.plot_surface(x_vals, y_vals, z_vals, label=str(self.mdl.hypothesis.function), + rstride=1, cstride=1, antialiased=False, alpha=0.1) + + #rstride=1, cstride=1, antialiased=False, alpha=0.1 # plot the measurement points - #ax.scatter(X_params, Y_params, medians, fmt=".k", label=self.mdl.callpath) - #ax.errorbar(X_params, Y_params, means, zuplims=zuplims, zlolims=zlolims, fmt=".k", label=self.mdl.callpath) + ax.scatter(X_params, Y_params, medians, c="black", marker="x", label="median") + ax.scatter(X_params, Y_params, means, c="black", marker="+", label="mean") + ax.scatter(X_params, Y_params, mins, c="black", marker="_", label="min") + ax.scatter(X_params, Y_params, maxes, c="black", marker="_", label="max") + + # Draw connecting line for min, max -> error bars + line_x, line_y, line_z = [], [], [] + for x, y, min_v, max_v in zip(X_params, Y_params, mins, maxes): + line_x.append(x), line_x.append(x) + line_y.append(y), line_y.append(y) + line_z.append(min_v), line_z.append(max_v) + line_x.append(np.nan), line_y.append(np.nan), line_z.append(np.nan) + ax.plot(line_x, line_y, line_z, color="black") + + #ax.legend() + ax.set_xlabel(self.parameters[0]) + ax.set_ylabel(self.parameters[1]) + ax.set_zlabel(self.mdl.metric) + ax.set_title(str(self.mdl.callpath)+"()") + + dict_callpath_color = {} + #TODO: need to convert that model into a python evaluable function + #TODO: convert 0.00001234 to 1.234*10^2... + simple_function = self.simplify_function(self.mdl.hypothesis.function) + dict_callpath_color[str(simple_function)] = ["surface", "blue"] + dict_callpath_color["mean"] = ["mean", "black"] + dict_callpath_color["median"] = ["median", "black"] + dict_callpath_color["min"] = ["min", "black"] + dict_callpath_color["max"] = ["max", "black"] + + self.draw_legend(ax, dict_callpath_color) return fig, ax From ae2a6d9f2d7637691b52fbb4e701f215a39d0aec Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Fri, 25 Aug 2023 23:12:24 -0700 Subject: [PATCH 10/57] added parameters to control output of display function --- thicket/model_extrap.py | 152 +++++++++++++--------------------------- 1 file changed, 49 insertions(+), 103 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 80a66301..f11b4325 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -10,6 +10,7 @@ import matplotlib.pyplot as plt from matplotlib import patches as mpatches import matplotlib.lines as mlines +import matplotlib.ticker as mticker import numpy as np import pandas as pd @@ -64,85 +65,13 @@ def eval(self, val): """Evaluate function (self) at val. f(val) = result""" return self.mdl.hypothesis.function.evaluate(val) - def simplify_function(self, model_function): + def simplify_coefficients(self, model_function): """Simplify the created model function so it is easier to read. Shortens coefficients to 3 decimals.""" simplified_model_function = copy.deepcopy(model_function) - simplified_model_function.constant_coefficient = "{:.3f}".format(model_function.constant_coefficient) + simplified_model_function.constant_coefficient = "{:.3E}".format(model_function.constant_coefficient) for i in range(len(model_function.compound_terms)): - model_function.compound_terms[i].coefficient - simplified_model_function.compound_terms[i].coefficient = "{:.3f}".format(model_function.compound_terms[i].coefficient) + simplified_model_function.compound_terms[i].coefficient = "{:.3E}".format(model_function.compound_terms[i].coefficient) return simplified_model_function - - def display_measurements(self): - """_summary_ - """ - import numpy as np - import matplotlib.pyplot as plt - from matplotlib.widgets import Slider, Button - - - # The parametrized function to be plotted - def f(t, amplitude, frequency): - return amplitude * np.sin(2 * np.pi * frequency * t) - - t = np.linspace(0, 1, 1000) - - # Define initial parameters - init_amplitude = 5 - init_frequency = 3 - - # Create the figure and the line that we will manipulate - fig, ax = plt.subplots() - line, = ax.plot(t, f(t, init_amplitude, init_frequency), lw=2) - ax.set_xlabel('Time [s]') - - # adjust the main plot to make room for the sliders - fig.subplots_adjust(left=0.25, bottom=0.25) - - # Make a horizontal slider to control the frequency. - axfreq = fig.add_axes([0.25, 0.1, 0.65, 0.03]) - freq_slider = Slider( - ax=axfreq, - label='Frequency [Hz]', - valmin=0.1, - valmax=30, - valinit=init_frequency, - ) - - # Make a vertically oriented slider to control the amplitude - axamp = fig.add_axes([0.1, 0.25, 0.0225, 0.63]) - amp_slider = Slider( - ax=axamp, - label="Amplitude", - valmin=0, - valmax=10, - valinit=init_amplitude, - orientation="vertical" - ) - - - # The function to be called anytime a slider's value changes - def update(val): - line.set_ydata(f(t, amp_slider.val, freq_slider.val)) - fig.canvas.draw_idle() - - - # register the update function with each slider - freq_slider.on_changed(update) - amp_slider.on_changed(update) - - # Create a `matplotlib.widgets.Button` to reset the sliders to initial values. - resetax = fig.add_axes([0.8, 0.025, 0.1, 0.04]) - button = Button(resetax, 'Reset', hovercolor='0.975') - - - def reset(event): - freq_slider.reset() - amp_slider.reset() - button.on_clicked(reset) - - #plt.show() - return fig def display_interactive(self): """_summary_ @@ -206,7 +135,8 @@ def visualizeGraph(x_max): return plt - def display_one_parameter_model(self, RSS): + def display_one_parameter_model(self, show_mean=False, show_median=False, + show_min_max=False, RSS=False): """Display function Arguments: @@ -231,6 +161,17 @@ def display_one_parameter_model(self, RSS): ) print("model:",self.mdl.hypothesis.function) + + temp = str(self.simplify_coefficients(self.mdl.hypothesis.function)) + temp = temp.replace("+-", "-") + temp = temp.replace("*", "\cdot") + temp = temp.replace("(", "{") + temp = temp.replace(")", "}") + temp = temp.replace("log2{p}", "\log_2(p)") + temp = "$" + temp + "$" + + + print("new function:",temp) # Y values y_vals = [self.mdl.hypothesis.function.evaluate(x) for x in x_vals] @@ -239,7 +180,7 @@ def display_one_parameter_model(self, RSS): fig, ax = plt.subplots() # Plot line - ax.plot(x_vals, y_vals, label=self.mdl.hypothesis.function) + ax.plot(x_vals, y_vals, label=temp) # Plot scatter @@ -252,7 +193,7 @@ def display_one_parameter_model(self, RSS): ax.errorbar(params, mean, yerr=errors, fmt=".k", label=self.mdl.callpath) ax.plot(params, median, "+k", label="median") - ax.set_xlabel(self.parameters[0]) + ax.set_xlabel(self.parameters[0] + " $p$") ax.set_ylabel(self.mdl.metric) if RSS: ax.text( @@ -288,7 +229,8 @@ def draw_legend(self, ax_all, dict_callpath_color): ax_all.legend(handles=handles, loc="upper right", bbox_to_anchor=(2.5, 1)) - def display_two_parameter_model(self, RSS): + def display_two_parameter_model(self, show_mean=False, show_median=False, + show_min_max=False, RSS=False): """Display function Arguments: @@ -298,6 +240,7 @@ def display_two_parameter_model(self, RSS): #TODO: add parameters to display mean, median, min, max # optiomal scaling surface # change plot opacity based on if measurements are displayed + # write function with real math script in legend # Sort based on x and y values measures_sorted = sorted(self.mdl.measurements, key=lambda x: (x.coordinate[0], x.coordinate[1])) @@ -336,42 +279,45 @@ def display_two_parameter_model(self, RSS): #rstride=1, cstride=1, antialiased=False, alpha=0.1 - # plot the measurement points - ax.scatter(X_params, Y_params, medians, c="black", marker="x", label="median") - ax.scatter(X_params, Y_params, means, c="black", marker="+", label="mean") - ax.scatter(X_params, Y_params, mins, c="black", marker="_", label="min") - ax.scatter(X_params, Y_params, maxes, c="black", marker="_", label="max") - - # Draw connecting line for min, max -> error bars - line_x, line_y, line_z = [], [], [] - for x, y, min_v, max_v in zip(X_params, Y_params, mins, maxes): - line_x.append(x), line_x.append(x) - line_y.append(y), line_y.append(y) - line_z.append(min_v), line_z.append(max_v) - line_x.append(np.nan), line_y.append(np.nan), line_z.append(np.nan) - ax.plot(line_x, line_y, line_z, color="black") + # plot the measurement points if options selected + if show_median: + ax.scatter(X_params, Y_params, medians, c="black", marker="x", label="median") + if show_mean: + ax.scatter(X_params, Y_params, means, c="black", marker="+", label="mean") + if show_min_max: + ax.scatter(X_params, Y_params, mins, c="black", marker="_", label="min") + ax.scatter(X_params, Y_params, maxes, c="black", marker="_", label="max") + # Draw connecting line for min, max -> error bars + line_x, line_y, line_z = [], [], [] + for x, y, min_v, max_v in zip(X_params, Y_params, mins, maxes): + line_x.append(x), line_x.append(x) + line_y.append(y), line_y.append(y) + line_z.append(min_v), line_z.append(max_v) + line_x.append(np.nan), line_y.append(np.nan), line_z.append(np.nan) + ax.plot(line_x, line_y, line_z, color="black") - #ax.legend() ax.set_xlabel(self.parameters[0]) ax.set_ylabel(self.parameters[1]) ax.set_zlabel(self.mdl.metric) ax.set_title(str(self.mdl.callpath)+"()") dict_callpath_color = {} - #TODO: need to convert that model into a python evaluable function - #TODO: convert 0.00001234 to 1.234*10^2... simple_function = self.simplify_function(self.mdl.hypothesis.function) dict_callpath_color[str(simple_function)] = ["surface", "blue"] - dict_callpath_color["mean"] = ["mean", "black"] - dict_callpath_color["median"] = ["median", "black"] - dict_callpath_color["min"] = ["min", "black"] - dict_callpath_color["max"] = ["max", "black"] + if show_mean: + dict_callpath_color["mean"] = ["mean", "black"] + if show_median: + dict_callpath_color["median"] = ["median", "black"] + if show_min_max: + dict_callpath_color["min"] = ["min", "black"] + dict_callpath_color["max"] = ["max", "black"] self.draw_legend(ax, dict_callpath_color) return fig, ax - def display(self, RSS): + def display(self, show_mean=False, show_median=False, + show_min_max=False, RSS=False): """Display function Arguments: @@ -380,10 +326,10 @@ def display(self, RSS): # check number of model parameters if len(self.parameters) == 1: - fig, ax = self.display_one_parameter_model(RSS) + fig, ax = self.display_one_parameter_model(show_mean, show_median, show_min_max, RSS) elif len(self.parameters) == 2: - fig, ax = self.display_two_parameter_model(RSS) + fig, ax = self.display_two_parameter_model(show_mean, show_median, show_min_max, RSS) else: raise Exception("Plotting performance models with "+str(len(self.parameters))+" parameters is currently not supported.") From ce04201558e9adce2937d6327da36dfb78ea5bb6 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Mon, 28 Aug 2023 17:38:05 -0700 Subject: [PATCH 11/57] added plotting options for 1,2 parameters vizs --- thicket/model_extrap.py | 404 ++++++++++++++++++++++++++-------------- 1 file changed, 261 insertions(+), 143 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index f11b4325..3ffb3cf8 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -54,159 +54,209 @@ class ModelWrapper: """ def __init__(self, mdl, parameters): + """Init function of the ModelWrapper class. + + Args: + mdl (Extra-P Model): An Extra-P model object. + parameters (list): A list of string parameters that will be considered for modeling. + """ self.mdl = mdl self.parameters = parameters - def __str__(self): - """Display self as a function""" - return str(self.mdl.hypothesis.function) - - def eval(self, val): - """Evaluate function (self) at val. f(val) = result""" - return self.mdl.hypothesis.function.evaluate(val) + def __str__(self) -> str: + """Returns the Extra-P performance model function as a string. - def simplify_coefficients(self, model_function): - """Simplify the created model function so it is easier to read. Shortens coefficients to 3 decimals.""" - simplified_model_function = copy.deepcopy(model_function) - simplified_model_function.constant_coefficient = "{:.3E}".format(model_function.constant_coefficient) - for i in range(len(model_function.compound_terms)): - simplified_model_function.compound_terms[i].coefficient = "{:.3E}".format(model_function.compound_terms[i].coefficient) - return simplified_model_function - - def display_interactive(self): - """_summary_ + Returns: + str: The Extra-P performance model function. """ - # Sort based on x values - measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) + return str(self.mdl.hypothesis.function) - # Scatter plot - params = [ms.coordinate[0] for ms in measures_sorted] # X values - median = [ms.median for ms in measures_sorted] - mean = [ms.mean for ms in measures_sorted] # Y values - mins = [ms.minimum for ms in measures_sorted] - maxes = [ms.maximum for ms in measures_sorted] + def eval(self, val: float) -> float: + """Evaluates the performance model function using a given value and returns the result. - # Line plot + Args: + val (float): The value the function will be evaluated for. - plt.ioff() - fig, ax = plt.subplots() - fig.subplots_adjust(bottom=0.2) - - # Function to plot graph - # according to expression - def visualizeGraph(x_max): - # X value plotting range. Dynamic based off what the largest/smallest values are - x_vals = np.arange( - params[0], float(x_max), (params[-1] - params[0]) / 100.0 - ) - # Y values - y_vals = [self.mdl.hypothesis.function.evaluate(x) for x in x_vals] - - # Plot line - l, = ax.plot(x_vals, y_vals, label=self.mdl.hypothesis.function) - #l, = ax.plot(t, np.zeros_like(t), lw=2) - l.set_ydata(y_vals) - ax.relim() - ax.autoscale_view() - plt.draw() - - # Adding TextBox to graph - from matplotlib.widgets import TextBox - graphBox = fig.add_axes([0.1, 0.05, 0.8, 0.075]) - txtBox = TextBox(graphBox, "Max x: ") - txtBox.on_submit(visualizeGraph) - txtBox.set_val(str(1.5 * params[-1])) + Returns: + float: The result value. + """ + return self.mdl.hypothesis.function.evaluate(val) + + def convert_coefficient_to_scientific_notation(self, coefficient: float) -> str: + """This function converts an Extra-P model coefficient into scientific + notation and returns it as a string. It also shortes the coefficients + to three decimal places. + Args: + coefficient (float): A model coefficient from a Extra-P function. - # Plot scatter - - errors = [ - np.subtract(mean, mins), - np.subtract(maxes, mean), - ] - - #ax.plot(params, measures, "ro", yerr=[min,maxs], label=self.mdl.callpath) - ax.errorbar(params, mean, yerr=errors, fmt=".k", label=self.mdl.callpath) - ax.plot(params, median, "+k", label="median") + Returns: + str: The coefficient in scientific notation. + """ + f = mticker.ScalarFormatter(useMathText=True) + f.set_powerlimits((-3,3)) + x = "{}".format(f.format_data(float(coefficient))) + terms = x.split(" ") + if not terms[0][:1].isnumeric(): + coeff = terms[0][1:] + coeff = "{:.3f}".format(float(coeff)) + new_coeff = "" + new_coeff += "-" + new_coeff += coeff + for i in range(len(terms)): + if i != 0: + new_coeff += terms[i] + return new_coeff + else: + coeff = terms[0] + coeff = "{:.3f}".format(float(coeff)) + new_coeff = "" + new_coeff += coeff + for i in range(len(terms)): + if i != 0: + new_coeff += terms[i] + return new_coeff + + def convert_function_to_scientific_notation(self, model_function) -> str: + """This function converts the created performance model function into a + scientific notation in string format. - ax.set_xlabel(self.parameters) - ax.set_ylabel(self.mdl.metric) - ax.legend(loc=1) + Args: + model_function (Extra-P Model): The Extra-P Model object containing the scaling function. - return plt + Returns: + str: The resulting scientific version of the performance function. + """ + + function_terms = len(model_function.compound_terms) + model_copy = copy.deepcopy(model_function) + model_copy.constant_coefficient = self.convert_coefficient_to_scientific_notation(model_function.constant_coefficient) + for i in range(function_terms): + model_copy.compound_terms[i].coefficient = self.convert_coefficient_to_scientific_notation(model_function.compound_terms[i].coefficient) + scientific_function = str(model_copy) + scientific_function = scientific_function.replace("+-", "-") + scientific_function = scientific_function.replace("+ -", "-") + scientific_function = scientific_function.replace("*", "\cdot") + scientific_function = scientific_function.replace("(", "{") + scientific_function = scientific_function.replace(")", "}") + scientific_function = scientific_function.replace("log2{p}", "\log_2(p)") + scientific_function = scientific_function.replace("log2{q}", "\log_2(q)") + scientific_function = "$" + scientific_function + "$" + return scientific_function def display_one_parameter_model(self, show_mean=False, show_median=False, - show_min_max=False, RSS=False): - """Display function + show_min_max=False, RSS=False, AR2=False, + show_opt_scaling=False): + """Display function to visualize performance models with one model parameter. - Arguments: - RSS (bool): whether to display Extra-P RSS on the plot + Args: + show_mean (bool, optional): whether to display mean values on the plot. Defaults to False. + show_median (bool, optional): whether to display median values on the plot. Defaults to False. + show_min_max (bool, optional): whether to display min/max values on the plot. Defaults to False. + RSS (bool, optional): whether to display Extra-P model RSS on the plot. Defaults to False. + AR2 (bool, optional): whether to display Extra-P model AR2 on the plot. Defaults to False. + show_opt_scaling (bool, optional): whether to display the optimal scaling curve. Defaults to False. + + Raises: + Exception: Raises an exception if the optimal scaling curve can not be plotted for the given model parameter. + + Returns: + fig, ax: The matplotlib figure and axis objects, so the user can display and manipulate the plot. """ - # Sort based on x values + # sort based on x values measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) - # Scatter plot + # compute means, medians, mins, maxes params = [ms.coordinate[0] for ms in measures_sorted] # X values median = [ms.median for ms in measures_sorted] mean = [ms.mean for ms in measures_sorted] # Y values mins = [ms.minimum for ms in measures_sorted] maxes = [ms.maximum for ms in measures_sorted] - # Line plot - - # X value plotting range. Dynamic based off what the largest/smallest values are + # x value plotting range, dynamic based off what the largest/smallest values are x_vals = np.arange( params[0], 1.5 * params[-1], (params[-1] - params[0]) / 100.0 ) - print("model:",self.mdl.hypothesis.function) - - temp = str(self.simplify_coefficients(self.mdl.hypothesis.function)) - temp = temp.replace("+-", "-") - temp = temp.replace("*", "\cdot") - temp = temp.replace("(", "{") - temp = temp.replace(")", "}") - temp = temp.replace("log2{p}", "\log_2(p)") - temp = "$" + temp + "$" - - - print("new function:",temp) + # create a scientific representation of the created performance model + scientific_function = self.convert_function_to_scientific_notation(self.mdl.hypothesis.function) - # Y values + # compute y values for plotting y_vals = [self.mdl.hypothesis.function.evaluate(x) for x in x_vals] plt.ioff() fig, ax = plt.subplots() - - # Plot line - ax.plot(x_vals, y_vals, label=temp) - - # Plot scatter - errors = [ - np.subtract(mean, mins), - np.subtract(maxes, mean), - ] - - #ax.plot(params, measures, "ro", yerr=[min,maxs], label=self.mdl.callpath) - ax.errorbar(params, mean, yerr=errors, fmt=".k", label=self.mdl.callpath) - ax.plot(params, median, "+k", label="median") - + if show_opt_scaling == True: + y_vals_opt = [] + if self.parameters[0] == "jobsize": + for _ in range(len(y_vals)): + y_vals_opt.append(y_vals[0]) + ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") + else: + raise Exception("Plotting the optimal scaling is currently not supported for other parameters.") + + # plot the model + ax.plot(x_vals, y_vals, label=scientific_function, color="blue") + + # plot optional features like min/max + if show_mean == True: + ax.plot(params, mean, color="black", marker='+', label=self.mdl.callpath, linestyle = 'None') + if show_median == True: + ax.plot(params, median, color="black", marker='x', label="median", linestyle = 'None') + if show_min_max == True: + ax.plot(params, mins, color="black", marker='_', label="min", linestyle = 'None') + ax.plot(params, maxes, color="black", marker='_', label="max", linestyle = 'None') + # Draw connecting lines + line_x, line_y = [], [] + for x, min_v, max_v in zip(params, mins, maxes): + line_x.append(x), line_x.append(x) + line_y.append(min_v), line_y.append(max_v) + line_x.append(np.nan), line_y.append(np.nan) + ax.plot(line_x, line_y, color="black") + + # plot axes and titles ax.set_xlabel(self.parameters[0] + " $p$") ax.set_ylabel(self.mdl.metric) - if RSS: + ax.set_title(str(self.mdl.callpath)+"()") + + # plot rss and ar2 values + y_pos_text = max(maxes)-0.1*max(maxes) + rss = "{:.3f}".format(self.mdl.hypothesis.RSS) + ar2 = "{:.3f}".format(self.mdl.hypothesis.AR2) + if RSS and not AR2: ax.text( x_vals[0], - max(y_vals + mean), - "RSS = " + str(self.mdl.hypothesis.RSS), + y_pos_text, + "RSS = " + rss, ) + elif AR2 and not RSS: + ax.text( + x_vals[0], + y_pos_text, + "AR2 = " + ar2, + ) + elif RSS and AR2: + ax.text( + x_vals[0], + y_pos_text, + "RSS = " + rss + "\nAR2 = " + ar2, + ) + + # plot legend ax.legend(loc=1) return fig, ax - def draw_legend(self, ax_all, dict_callpath_color): - # draw legend + def draw_legend(self, axis, dict_callpath_color): + """This method draws a legend for 3D plots. + + Args: + axis (_type_): The matplotlib axis of a figure object. + dict_callpath_color (dict): The color/marker dict for the elements displayed in the plot. + """ + handles = list() for key, value in dict_callpath_color.items(): labelName = str(key) @@ -226,59 +276,77 @@ def draw_legend(self, ax_all, dict_callpath_color): markersize=10, label=labelName) handles.append(mark) - ax_all.legend(handles=handles, - loc="upper right", bbox_to_anchor=(2.5, 1)) + axis.legend(handles=handles, + loc="center right", bbox_to_anchor=(2.75, 0.5)) def display_two_parameter_model(self, show_mean=False, show_median=False, - show_min_max=False, RSS=False): - """Display function + show_min_max=False, RSS=False, AR2=False, + show_opt_scaling=False): + """Display function to visualize performance models with two model parameters. - Arguments: - RSS (bool): whether to display Extra-P RSS on the plot + Args: + show_mean (bool, optional): whether to display mean values on the plot. Defaults to False. + show_median (bool, optional): whether to display median values on the plot. Defaults to False. + show_min_max (bool, optional): whether to display min/max values on the plot. Defaults to False. + RSS (bool, optional): whether to display Extra-P model RSS on the plot. Defaults to False. + AR2 (bool, optional): whether to display Extra-P model AR2 on the plot. Defaults to False. + show_opt_scaling (bool, optional): whether to display the optimal scaling curve. Defaults to False. + + Raises: + Exception: Raises an exception if the optimal scaling curve can not be plotted for the given model parameter. + + Returns: + fig, ax: The matplotlib figure and axis objects, so the user can display and manipulate the plot. """ - #TODO: add parameters to display mean, median, min, max - # optiomal scaling surface - # change plot opacity based on if measurements are displayed - # write function with real math script in legend - - # Sort based on x and y values + # sort based on x and y values measures_sorted = sorted(self.mdl.measurements, key=lambda x: (x.coordinate[0], x.coordinate[1])) - # Scatter plot + # get x, y value from measurements X_params = [ms.coordinate[0] for ms in measures_sorted] # X values Y_params = [ms.coordinate[1] for ms in measures_sorted] # Y values - # Z values + # get median, mean, min, and max values medians = [ms.median for ms in measures_sorted] means = [ms.mean for ms in measures_sorted] mins = [ms.minimum for ms in measures_sorted] maxes = [ms.maximum for ms in measures_sorted] - # Surface plot - # X value plotting range. Dynamic based off what the largest/smallest values are + # x value plotting range. Dynamic based off what the largest/smallest values are x_vals = np.linspace( start=X_params[0], stop=1.5 * X_params[-1], num=100 ) - # Y value plotting range. Dynamic based off what the largest/smallest values are + # y value plotting range. Dynamic based off what the largest/smallest values are y_vals = np.linspace( start=Y_params[0], stop=1.5 * Y_params[-1], num=100 ) x_vals, y_vals = np.meshgrid(x_vals, y_vals) z_vals = self.mdl.hypothesis.function.evaluate([x_vals, y_vals]) + + def opt_scaling_func(x, y): + return (means[0]/100)*y + + z_vals2 = opt_scaling_func(x_vals, y_vals) plt.ioff() fig = plt.figure() ax = fig.gca(projection='3d') + + if show_opt_scaling: + if self.parameters[0] == "jobsize" and self.parameters[1] == "problem_size": + ax.plot_surface(x_vals, y_vals, z_vals2, label="optimal scaling", + rstride=1, cstride=1, antialiased=False, alpha=0.1, color="red") - # plot model as surface plot - ax.plot_surface(x_vals, y_vals, z_vals, label=str(self.mdl.hypothesis.function), - rstride=1, cstride=1, antialiased=False, alpha=0.1) + # plot model as surface plot depending on options given + if show_mean or show_median or show_min_max or show_opt_scaling: + ax.plot_surface(x_vals, y_vals, z_vals, label=str(self.mdl.hypothesis.function), + rstride=1, cstride=1, antialiased=False, alpha=0.1, color="blue") + else: + ax.plot_surface(x_vals, y_vals, z_vals, label=str(self.mdl.hypothesis.function), + rstride=1, cstride=1, antialiased=True, color="blue") - #rstride=1, cstride=1, antialiased=False, alpha=0.1 - # plot the measurement points if options selected if show_median: ax.scatter(X_params, Y_params, medians, c="black", marker="x", label="median") @@ -296,14 +364,18 @@ def display_two_parameter_model(self, show_mean=False, show_median=False, line_x.append(np.nan), line_y.append(np.nan), line_z.append(np.nan) ax.plot(line_x, line_y, line_z, color="black") - ax.set_xlabel(self.parameters[0]) - ax.set_ylabel(self.parameters[1]) + # axis labels and title + ax.set_xlabel(self.parameters[0] + " $p$") + ax.set_ylabel(self.parameters[1] + " $q$") ax.set_zlabel(self.mdl.metric) ax.set_title(str(self.mdl.callpath)+"()") + # create scientific representation of create performance model + scientific_function = self.convert_function_to_scientific_notation(self.mdl.hypothesis.function) + + # create dict for legend color and markers dict_callpath_color = {} - simple_function = self.simplify_function(self.mdl.hypothesis.function) - dict_callpath_color[str(simple_function)] = ["surface", "blue"] + dict_callpath_color[str(scientific_function)] = ["surface", "blue"] if show_mean: dict_callpath_color["mean"] = ["mean", "black"] if show_median: @@ -311,25 +383,67 @@ def display_two_parameter_model(self, show_mean=False, show_median=False, if show_min_max: dict_callpath_color["min"] = ["min", "black"] dict_callpath_color["max"] = ["max", "black"] + if show_opt_scaling: + dict_callpath_color["optimal scaling"] = ["surface", "red"] + + # plot rss and ar2 values + rss = "{:.3f}".format(self.mdl.hypothesis.RSS) + ar2 = "{:.3f}".format(self.mdl.hypothesis.AR2) + if RSS and not AR2: + ax.text2D( + 0, + 0.75, + "RSS = " + rss, + transform=ax.transAxes, + ) + elif AR2 and not RSS: + ax.text2D( + 0, + 0.75, + "AR2 = " + ar2, + transform=ax.transAxes, + ) + elif RSS and AR2: + ax.text2D( + 0, + 0.75, + "RSS = " + rss + "\nAR2 = " + ar2, + transform=ax.transAxes, + ) + # draw the legend self.draw_legend(ax, dict_callpath_color) return fig, ax def display(self, show_mean=False, show_median=False, - show_min_max=False, RSS=False): - """Display function + show_min_max=False, RSS=False, AR2=False, + show_opt_scaling=False): + """General display function for visualizing a performance model. + Calls the specific display function depending on the number of + found model parameters automatically. - Arguments: - RSS (bool): whether to display Extra-P RSS on the plot + Args: + show_mean (bool, optional): whether to display mean values on the plot. Defaults to False. + show_median (bool, optional): whether to display median values on the plot. Defaults to False. + show_min_max (bool, optional): whether to display min/max values on the plot. Defaults to False. + RSS (bool, optional): whether to display Extra-P model RSS on the plot. Defaults to False. + AR2 (bool, optional): whether to display Extra-P model AR2 on the plot. Defaults to False. + show_opt_scaling (bool, optional): whether to display the optimal scaling curve. Defaults to False. + + Raises: + Exception: Raises an exception if the user tries to display a model with a number of model parameters that is not supported. + + Returns: + fig, ax: The matplotlib figure and axis objects, so the user can display and manipulate the plot. """ # check number of model parameters if len(self.parameters) == 1: - fig, ax = self.display_one_parameter_model(show_mean, show_median, show_min_max, RSS) + fig, ax = self.display_one_parameter_model(show_mean, show_median, show_min_max, RSS, AR2, show_opt_scaling) elif len(self.parameters) == 2: - fig, ax = self.display_two_parameter_model(show_mean, show_median, show_min_max, RSS) + fig, ax = self.display_two_parameter_model(show_mean, show_median, show_min_max, RSS, AR2, show_opt_scaling) else: raise Exception("Plotting performance models with "+str(len(self.parameters))+" parameters is currently not supported.") @@ -371,9 +485,11 @@ def __init__(self, tht, parameters=None, metrics=None): self.experiment = None - def to_html(self, RSS=False): + def to_html(self, show_mean=False, show_median=False, + show_min_max=False, RSS=False, AR2=False, + show_opt_scaling=False): def model_to_img_html(model_obj): - fig, ax = model_obj.display(RSS) + fig, _ = model_obj.display(show_mean, show_median, show_min_max, RSS, AR2, show_opt_scaling) figfile = BytesIO() fig.savefig(figfile, format="jpg", transparent=False) figfile.seek(0) @@ -397,6 +513,8 @@ def model_to_img_html(model_obj): frm_dict = {met + MODEL_TAG: model_to_img_html for met in existing_metrics} # Subset of the aggregated statistics table with only the Extra-P columns selected + #TODO: to_html(escape=False, formatters=frm_dict), the formatter does not work for 3D stuff. + # need to find a workaround return self.tht.statsframe.dataframe[ [met + MODEL_TAG for met in existing_metrics] ].to_html(escape=False, formatters=frm_dict) From 6f46a4d2850ab317c372b395b3740b692395016c Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Mon, 28 Aug 2023 17:41:44 -0700 Subject: [PATCH 12/57] deleted UPDATES file --- UPDATES.md | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 UPDATES.md diff --git a/UPDATES.md b/UPDATES.md deleted file mode 100644 index f0ca87af..00000000 --- a/UPDATES.md +++ /dev/null @@ -1,4 +0,0 @@ -UPDATES: - -* added a check for .cali file type to reader_dispatch() so no value error is raised when other file types, folders are in the provided dir -* refactored the code in produce_models() so it can deal with measurement repetitions From f9a17aae98889eef86b9d89d797254b02f687b6e Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Tue, 29 Aug 2023 10:25:00 -0700 Subject: [PATCH 13/57] formatted code to pep8 standard --- thicket/model_extrap.py | 545 ++++++++++++++++++++++++++++------------ 1 file changed, 384 insertions(+), 161 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 3ffb3cf8..cfeee7b2 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -26,14 +26,15 @@ MODEL_TAG = "_extrap-model" + class ExtrapReaderException(Exception): - """Custom exception class for raising exceptions while reading in data from + """Custom exception class for raising exceptions while reading in data from a pandas type dataframe from a thicket object into the Extra-P experiment object. Args: Exception (Exception): Python base Exception object. """ - + def __init__(self, message: str, profile: int) -> None: """Initialization function for the custom Extra-P reader exception class. @@ -44,7 +45,8 @@ def __init__(self, message: str, profile: int) -> None: super().__init__() self.message = message self.profile = profile - + + class ModelWrapper: """Wrapper for an Extra-P model. @@ -81,7 +83,7 @@ def eval(self, val: float) -> float: float: The result value. """ return self.mdl.hypothesis.function.evaluate(val) - + def convert_coefficient_to_scientific_notation(self, coefficient: float) -> str: """This function converts an Extra-P model coefficient into scientific notation and returns it as a string. It also shortes the coefficients @@ -94,7 +96,7 @@ def convert_coefficient_to_scientific_notation(self, coefficient: float) -> str: str: The coefficient in scientific notation. """ f = mticker.ScalarFormatter(useMathText=True) - f.set_powerlimits((-3,3)) + f.set_powerlimits((-3, 3)) x = "{}".format(f.format_data(float(coefficient))) terms = x.split(" ") if not terms[0][:1].isnumeric(): @@ -107,7 +109,7 @@ def convert_coefficient_to_scientific_notation(self, coefficient: float) -> str: if i != 0: new_coeff += terms[i] return new_coeff - else: + else: coeff = terms[0] coeff = "{:.3f}".format(float(coeff)) new_coeff = "" @@ -127,26 +129,42 @@ def convert_function_to_scientific_notation(self, model_function) -> str: Returns: str: The resulting scientific version of the performance function. """ - + function_terms = len(model_function.compound_terms) model_copy = copy.deepcopy(model_function) - model_copy.constant_coefficient = self.convert_coefficient_to_scientific_notation(model_function.constant_coefficient) + model_copy.constant_coefficient = ( + self.convert_coefficient_to_scientific_notation( + model_function.constant_coefficient + ) + ) for i in range(function_terms): - model_copy.compound_terms[i].coefficient = self.convert_coefficient_to_scientific_notation(model_function.compound_terms[i].coefficient) + model_copy.compound_terms[ + i + ].coefficient = self.convert_coefficient_to_scientific_notation( + model_function.compound_terms[i].coefficient + ) scientific_function = str(model_copy) scientific_function = scientific_function.replace("+-", "-") scientific_function = scientific_function.replace("+ -", "-") scientific_function = scientific_function.replace("*", "\cdot") scientific_function = scientific_function.replace("(", "{") scientific_function = scientific_function.replace(")", "}") - scientific_function = scientific_function.replace("log2{p}", "\log_2(p)") - scientific_function = scientific_function.replace("log2{q}", "\log_2(q)") + scientific_function = scientific_function.replace( + "log2{p}", "\log_2(p)") + scientific_function = scientific_function.replace( + "log2{q}", "\log_2(q)") scientific_function = "$" + scientific_function + "$" return scientific_function - - def display_one_parameter_model(self, show_mean=False, show_median=False, - show_min_max=False, RSS=False, AR2=False, - show_opt_scaling=False): + + def display_one_parameter_model( + self, + show_mean=False, + show_median=False, + show_min_max=False, + RSS=False, + AR2=False, + show_opt_scaling=False, + ): """Display function to visualize performance models with one model parameter. Args: @@ -163,14 +181,15 @@ def display_one_parameter_model(self, show_mean=False, show_median=False, Returns: fig, ax: The matplotlib figure and axis objects, so the user can display and manipulate the plot. """ - + # sort based on x values - measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) + measures_sorted = sorted( + self.mdl.measurements, key=lambda x: x.coordinate[0]) # compute means, medians, mins, maxes params = [ms.coordinate[0] for ms in measures_sorted] # X values median = [ms.median for ms in measures_sorted] - mean = [ms.mean for ms in measures_sorted] # Y values + mean = [ms.mean for ms in measures_sorted] # Y values mins = [ms.minimum for ms in measures_sorted] maxes = [ms.maximum for ms in measures_sorted] @@ -178,36 +197,59 @@ def display_one_parameter_model(self, show_mean=False, show_median=False, x_vals = np.arange( params[0], 1.5 * params[-1], (params[-1] - params[0]) / 100.0 ) - + # create a scientific representation of the created performance model - scientific_function = self.convert_function_to_scientific_notation(self.mdl.hypothesis.function) + scientific_function = self.convert_function_to_scientific_notation( + self.mdl.hypothesis.function + ) # compute y values for plotting y_vals = [self.mdl.hypothesis.function.evaluate(x) for x in x_vals] - + plt.ioff() fig, ax = plt.subplots() - + if show_opt_scaling == True: y_vals_opt = [] if self.parameters[0] == "jobsize": for _ in range(len(y_vals)): y_vals_opt.append(y_vals[0]) - ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, + label="optimal scaling", color="red") else: - raise Exception("Plotting the optimal scaling is currently not supported for other parameters.") + raise Exception( + "Plotting the optimal scaling is currently not supported for other parameters." + ) # plot the model ax.plot(x_vals, y_vals, label=scientific_function, color="blue") - + # plot optional features like min/max if show_mean == True: - ax.plot(params, mean, color="black", marker='+', label=self.mdl.callpath, linestyle = 'None') + ax.plot( + params, + mean, + color="black", + marker="+", + label=self.mdl.callpath, + linestyle="None", + ) if show_median == True: - ax.plot(params, median, color="black", marker='x', label="median", linestyle = 'None') + ax.plot( + params, + median, + color="black", + marker="x", + label="median", + linestyle="None", + ) if show_min_max == True: - ax.plot(params, mins, color="black", marker='_', label="min", linestyle = 'None') - ax.plot(params, maxes, color="black", marker='_', label="max", linestyle = 'None') + ax.plot( + params, mins, color="black", marker="_", label="min", linestyle="None" + ) + ax.plot( + params, maxes, color="black", marker="_", label="max", linestyle="None" + ) # Draw connecting lines line_x, line_y = [], [] for x, min_v, max_v in zip(params, mins, maxes): @@ -215,14 +257,14 @@ def display_one_parameter_model(self, show_mean=False, show_median=False, line_y.append(min_v), line_y.append(max_v) line_x.append(np.nan), line_y.append(np.nan) ax.plot(line_x, line_y, color="black") - + # plot axes and titles ax.set_xlabel(self.parameters[0] + " $p$") ax.set_ylabel(self.mdl.metric) - ax.set_title(str(self.mdl.callpath)+"()") - - # plot rss and ar2 values - y_pos_text = max(maxes)-0.1*max(maxes) + ax.set_title(str(self.mdl.callpath) + "()") + + # plot rss and ar2 values + y_pos_text = max(maxes) - 0.1 * max(maxes) rss = "{:.3f}".format(self.mdl.hypothesis.RSS) ar2 = "{:.3f}".format(self.mdl.hypothesis.AR2) if RSS and not AR2: @@ -243,12 +285,12 @@ def display_one_parameter_model(self, show_mean=False, show_median=False, y_pos_text, "RSS = " + rss + "\nAR2 = " + ar2, ) - + # plot legend ax.legend(loc=1) return fig, ax - + def draw_legend(self, axis, dict_callpath_color): """This method draws a legend for 3D plots. @@ -256,7 +298,7 @@ def draw_legend(self, axis, dict_callpath_color): axis (_type_): The matplotlib axis of a figure object. dict_callpath_color (dict): The color/marker dict for the elements displayed in the plot. """ - + handles = list() for key, value in dict_callpath_color.items(): labelName = str(key) @@ -264,24 +306,51 @@ def draw_legend(self, axis, dict_callpath_color): patch = mpatches.Patch(color=value[1], label=labelName) handles.append(patch) elif value[0] == "mean": - mark = mlines.Line2D([], [], color=value[1], marker='+', linestyle='None', - markersize=10, label=labelName) + mark = mlines.Line2D( + [], + [], + color=value[1], + marker="+", + linestyle="None", + markersize=10, + label=labelName, + ) handles.append(mark) elif value[0] == "median": - mark = mlines.Line2D([], [], color=value[1], marker='x', linestyle='None', - markersize=10, label=labelName) + mark = mlines.Line2D( + [], + [], + color=value[1], + marker="x", + linestyle="None", + markersize=10, + label=labelName, + ) handles.append(mark) elif value[0] == "min" or value[0] == "max": - mark = mlines.Line2D([], [], color=value[1], marker='_', linestyle='None', - markersize=10, label=labelName) + mark = mlines.Line2D( + [], + [], + color=value[1], + marker="_", + linestyle="None", + markersize=10, + label=labelName, + ) handles.append(mark) - - axis.legend(handles=handles, - loc="center right", bbox_to_anchor=(2.75, 0.5)) - - def display_two_parameter_model(self, show_mean=False, show_median=False, - show_min_max=False, RSS=False, AR2=False, - show_opt_scaling=False): + + axis.legend(handles=handles, loc="center right", + bbox_to_anchor=(2.75, 0.5)) + + def display_two_parameter_model( + self, + show_mean=False, + show_median=False, + show_min_max=False, + RSS=False, + AR2=False, + show_opt_scaling=False, + ): """Display function to visualize performance models with two model parameters. Args: @@ -298,14 +367,17 @@ def display_two_parameter_model(self, show_mean=False, show_median=False, Returns: fig, ax: The matplotlib figure and axis objects, so the user can display and manipulate the plot. """ - + # sort based on x and y values - measures_sorted = sorted(self.mdl.measurements, key=lambda x: (x.coordinate[0], x.coordinate[1])) + measures_sorted = sorted( + self.mdl.measurements, key=lambda x: ( + x.coordinate[0], x.coordinate[1]) + ) # get x, y value from measurements X_params = [ms.coordinate[0] for ms in measures_sorted] # X values Y_params = [ms.coordinate[1] for ms in measures_sorted] # Y values - + # get median, mean, min, and max values medians = [ms.median for ms in measures_sorted] means = [ms.mean for ms in measures_sorted] @@ -314,65 +386,97 @@ def display_two_parameter_model(self, show_mean=False, show_median=False, # x value plotting range. Dynamic based off what the largest/smallest values are x_vals = np.linspace( - start=X_params[0], stop=1.5 * X_params[-1], num=100 - ) + start=X_params[0], stop=1.5 * X_params[-1], num=100) # y value plotting range. Dynamic based off what the largest/smallest values are y_vals = np.linspace( - start=Y_params[0], stop=1.5 * Y_params[-1], num=100 - ) + start=Y_params[0], stop=1.5 * Y_params[-1], num=100) x_vals, y_vals = np.meshgrid(x_vals, y_vals) z_vals = self.mdl.hypothesis.function.evaluate([x_vals, y_vals]) - + def opt_scaling_func(x, y): - return (means[0]/100)*y + return (means[0] / 100) * y z_vals2 = opt_scaling_func(x_vals, y_vals) - + plt.ioff() - + fig = plt.figure() - ax = fig.gca(projection='3d') - + ax = fig.gca(projection="3d") + if show_opt_scaling: if self.parameters[0] == "jobsize" and self.parameters[1] == "problem_size": - ax.plot_surface(x_vals, y_vals, z_vals2, label="optimal scaling", - rstride=1, cstride=1, antialiased=False, alpha=0.1, color="red") - - # plot model as surface plot depending on options given + ax.plot_surface( + x_vals, + y_vals, + z_vals2, + label="optimal scaling", + rstride=1, + cstride=1, + antialiased=False, + alpha=0.1, + color="red", + ) + + # plot model as surface plot depending on options given if show_mean or show_median or show_min_max or show_opt_scaling: - ax.plot_surface(x_vals, y_vals, z_vals, label=str(self.mdl.hypothesis.function), - rstride=1, cstride=1, antialiased=False, alpha=0.1, color="blue") + ax.plot_surface( + x_vals, + y_vals, + z_vals, + label=str(self.mdl.hypothesis.function), + rstride=1, + cstride=1, + antialiased=False, + alpha=0.1, + color="blue", + ) else: - ax.plot_surface(x_vals, y_vals, z_vals, label=str(self.mdl.hypothesis.function), - rstride=1, cstride=1, antialiased=True, color="blue") - + ax.plot_surface( + x_vals, + y_vals, + z_vals, + label=str(self.mdl.hypothesis.function), + rstride=1, + cstride=1, + antialiased=True, + color="blue", + ) + # plot the measurement points if options selected if show_median: - ax.scatter(X_params, Y_params, medians, c="black", marker="x", label="median") + ax.scatter( + X_params, Y_params, medians, c="black", marker="x", label="median" + ) if show_mean: - ax.scatter(X_params, Y_params, means, c="black", marker="+", label="mean") + ax.scatter(X_params, Y_params, means, + c="black", marker="+", label="mean") if show_min_max: - ax.scatter(X_params, Y_params, mins, c="black", marker="_", label="min") - ax.scatter(X_params, Y_params, maxes, c="black", marker="_", label="max") + ax.scatter(X_params, Y_params, mins, + c="black", marker="_", label="min") + ax.scatter(X_params, Y_params, maxes, + c="black", marker="_", label="max") # Draw connecting line for min, max -> error bars line_x, line_y, line_z = [], [], [] for x, y, min_v, max_v in zip(X_params, Y_params, mins, maxes): line_x.append(x), line_x.append(x) line_y.append(y), line_y.append(y) line_z.append(min_v), line_z.append(max_v) - line_x.append(np.nan), line_y.append(np.nan), line_z.append(np.nan) + line_x.append(np.nan), line_y.append( + np.nan), line_z.append(np.nan) ax.plot(line_x, line_y, line_z, color="black") - + # axis labels and title ax.set_xlabel(self.parameters[0] + " $p$") ax.set_ylabel(self.parameters[1] + " $q$") ax.set_zlabel(self.mdl.metric) - ax.set_title(str(self.mdl.callpath)+"()") - + ax.set_title(str(self.mdl.callpath) + "()") + # create scientific representation of create performance model - scientific_function = self.convert_function_to_scientific_notation(self.mdl.hypothesis.function) - + scientific_function = self.convert_function_to_scientific_notation( + self.mdl.hypothesis.function + ) + # create dict for legend color and markers dict_callpath_color = {} dict_callpath_color[str(scientific_function)] = ["surface", "blue"] @@ -385,8 +489,8 @@ def opt_scaling_func(x, y): dict_callpath_color["max"] = ["max", "black"] if show_opt_scaling: dict_callpath_color["optimal scaling"] = ["surface", "red"] - - # plot rss and ar2 values + + # plot rss and ar2 values rss = "{:.3f}".format(self.mdl.hypothesis.RSS) ar2 = "{:.3f}".format(self.mdl.hypothesis.AR2) if RSS and not AR2: @@ -410,17 +514,23 @@ def opt_scaling_func(x, y): "RSS = " + rss + "\nAR2 = " + ar2, transform=ax.transAxes, ) - + # draw the legend self.draw_legend(ax, dict_callpath_color) - + return fig, ax - def display(self, show_mean=False, show_median=False, - show_min_max=False, RSS=False, AR2=False, - show_opt_scaling=False): + def display( + self, + show_mean=False, + show_median=False, + show_min_max=False, + RSS=False, + AR2=False, + show_opt_scaling=False, + ): """General display function for visualizing a performance model. - Calls the specific display function depending on the number of + Calls the specific display function depending on the number of found model parameters automatically. Args: @@ -437,19 +547,28 @@ def display(self, show_mean=False, show_median=False, Returns: fig, ax: The matplotlib figure and axis objects, so the user can display and manipulate the plot. """ - + # check number of model parameters if len(self.parameters) == 1: - fig, ax = self.display_one_parameter_model(show_mean, show_median, show_min_max, RSS, AR2, show_opt_scaling) - + fig, ax = self.display_one_parameter_model( + show_mean, show_median, show_min_max, RSS, AR2, show_opt_scaling + ) + elif len(self.parameters) == 2: - fig, ax = self.display_two_parameter_model(show_mean, show_median, show_min_max, RSS, AR2, show_opt_scaling) - + fig, ax = self.display_two_parameter_model( + show_mean, show_median, show_min_max, RSS, AR2, show_opt_scaling + ) + else: - raise Exception("Plotting performance models with "+str(len(self.parameters))+" parameters is currently not supported.") - + raise Exception( + "Plotting performance models with " + + str(len(self.parameters)) + + " parameters is currently not supported." + ) + return fig, ax - + + class Modeling: """Produce models for all the metrics across the given graphframes.""" @@ -469,35 +588,44 @@ def __init__(self, tht, parameters=None, metrics=None): metrics (list): A list of String value of the metrics Extra-P will create models for. """ self.tht = tht - + # if there were no parameters provided use the jobsize to create models, # which should always be available if not parameters: self.parameters = ["jobsize"] else: self.parameters = parameters - + # if no metrics have been provided create models for all existing metrics if not metrics: self.metrics = self.tht.exc_metrics + self.tht.inc_metrics else: self.metrics = metrics - + self.experiment = None - def to_html(self, show_mean=False, show_median=False, - show_min_max=False, RSS=False, AR2=False, - show_opt_scaling=False): + def to_html( + self, + show_mean=False, + show_median=False, + show_min_max=False, + RSS=False, + AR2=False, + show_opt_scaling=False, + ): def model_to_img_html(model_obj): - fig, _ = model_obj.display(show_mean, show_median, show_min_max, RSS, AR2, show_opt_scaling) + fig, _ = model_obj.display( + show_mean, show_median, show_min_max, RSS, AR2, show_opt_scaling + ) figfile = BytesIO() fig.savefig(figfile, format="jpg", transparent=False) figfile.seek(0) figdata_jpg = base64.b64encode(figfile.getvalue()).decode() - imgstr = ''.format(figdata_jpg) + imgstr = ''.format( + figdata_jpg) plt.close(fig) return imgstr - + # catch key errors when queriying for models with a callpath, metric combination # that does not exist because there was no measurement object created for them existing_metrics = [] @@ -509,11 +637,12 @@ def model_to_img_html(model_obj): existing_metrics.append(str(metric)) except KeyError: pass - - frm_dict = {met + MODEL_TAG: model_to_img_html for met in existing_metrics} + + frm_dict = { + met + MODEL_TAG: model_to_img_html for met in existing_metrics} # Subset of the aggregated statistics table with only the Extra-P columns selected - #TODO: to_html(escape=False, formatters=frm_dict), the formatter does not work for 3D stuff. + # TODO: to_html(escape=False, formatters=frm_dict), the formatter does not work for 3D stuff. # need to find a workaround return self.tht.statsframe.dataframe[ [met + MODEL_TAG for met in existing_metrics] @@ -547,62 +676,73 @@ def _add_extrap_statistics(self, node, metric): node, metric + "_RE" + MODEL_TAG ] = hypothesis_fn.RE - def produce_models(self, use_median=True, calc_total_metrics=False, - scaling_parameter="jobsize", add_stats=True): + def produce_models( + self, + use_median=True, + calc_total_metrics=False, + scaling_parameter="jobsize", + add_stats=True, + ): """Produces an Extra-P model. Models are generated by calling Extra-P's ModelGenerator. Arguments: use_median (bool): Set how Extra-P aggregates repetitions of the same - measurement configuration. If set to True, Extra-P uses the median for + measurement configuration. If set to True, Extra-P uses the median for model creation, otherwise it uses the mean. (Default=True) - calc_total_metrics (bool): Set calc_total_metrics to True to let Extra-P - internally calculate the total metric values for metrics measured + calc_total_metrics (bool): Set calc_total_metrics to True to let Extra-P + internally calculate the total metric values for metrics measured per MPI rank, e.g., the average runtime/rank. (Default=False) - scaling_parameter (String): Set the scaling parameter for the total metric + scaling_parameter (String): Set the scaling parameter for the total metric calculation. This parameter is only used when calc_total_metrics=True. - One needs to provide either the name of the parameter that models the + One needs to provide either the name of the parameter that models the resource allocation, e.g., the jobsize, or a fixed int value as a String, - when only scaling, e.g., the problem size, and the resource allocation + when only scaling, e.g., the problem size, and the resource allocation is fix. (Default="jobsize") add_stats (bool): Option to add hypothesis function statistics to the aggregated statistics table. (Default=True) """ - + # create an extra-p experiment experiment = Experiment() - + # create the model parameters for parameter in self.parameters: experiment.add_parameter(Parameter(parameter)) - + # Ordering of profiles in the performance data table - ensemble_profile_ordering = list(self.tht.dataframe.index.unique(level=1)) - + ensemble_profile_ordering = list( + self.tht.dataframe.index.unique(level=1)) + profile_parameter_value_mapping = {} for profile in ensemble_profile_ordering: profile_parameter_value_mapping[profile] = [] - + for parameter in self.parameters: current_param_mapping = self.tht.metadata[parameter].to_dict() for key, value in current_param_mapping.items(): profile_parameter_value_mapping[key].append(float(value)) - + # create the measurement coordinates for profile in ensemble_profile_ordering: - if Coordinate(profile_parameter_value_mapping[profile]) not in experiment.coordinates: - experiment.add_coordinate(Coordinate(profile_parameter_value_mapping[profile])) - + if ( + Coordinate(profile_parameter_value_mapping[profile]) + not in experiment.coordinates + ): + experiment.add_coordinate( + Coordinate(profile_parameter_value_mapping[profile]) + ) + # create the callpaths - #NOTE: could add calltree later on, possibly from hatchet data if available + # NOTE: could add calltree later on, possibly from hatchet data if available for node, _ in self.tht.dataframe.groupby(level=0): if Callpath(node.frame["name"]) not in experiment.callpaths: experiment.add_callpath(Callpath(node.frame["name"])) - + # create the metrics for metric in self.metrics: experiment.add_metric(Metric(metric)) - + # iteratre over coordinates for coordinate in experiment.coordinates: # iterate over callpaths @@ -613,20 +753,45 @@ def produce_models(self, use_median=True, calc_total_metrics=False, try: values = [] callpath_exists = False - #NOTE: potentially there is a better way to access the dataframes without looping + # NOTE: potentially there is a better way to access the dataframes without looping for node, single_node_df in self.tht.dataframe.groupby(level=0): if Callpath(node.frame["name"]) == callpath: callpath_exists = True coordinate_exists = False - for profile, single_prof_df in single_node_df.groupby(level=1): - if str(callpath) not in single_prof_df["name"].values: - raise ExtrapReaderException("The callpath \'"+str(callpath)+"\' does not exist in the profile \'"+str(profile)+"\'.", profile) - if Coordinate(profile_parameter_value_mapping[profile]) == coordinate: + for profile, single_prof_df in single_node_df.groupby( + level=1 + ): + if ( + str(callpath) + not in single_prof_df["name"].values + ): + raise ExtrapReaderException( + "The callpath '" + + str(callpath) + + "' does not exist in the profile '" + + str(profile) + + "'.", + profile, + ) + if ( + Coordinate( + profile_parameter_value_mapping[profile] + ) + == coordinate + ): coordinate_exists = True try: - value = single_prof_df[str(metric)].tolist() + value = single_prof_df[str( + metric)].tolist() except Exception: - raise ExtrapReaderException("The metric \'"+str(metric)+"\' does not exist in the profile \'"+str(profile)+"\'.", profile) + raise ExtrapReaderException( + "The metric '" + + str(metric) + + "' does not exist in the profile '" + + str(profile) + + "'.", + profile, + ) if len(value) == 1: # calculate total metric values if calc_total_metrics == True: @@ -635,17 +800,42 @@ def produce_models(self, use_median=True, calc_total_metrics=False, # read out scaling parameter for total metric value calculation # if the resource allocation is static if scaling_parameter.isnumeric(): - ranks = int(scaling_parameter) + ranks = int( + scaling_parameter) # otherwise read number of ranks from the provided parameter else: # check if the parameter exists - if scaling_parameter in self.parameters: - parameter_id = [i for i,x in enumerate(experiment.parameters) if x == Parameter(scaling_parameter)][0] - ranks = coordinate.__getitem__(parameter_id) + if ( + scaling_parameter + in self.parameters + ): + parameter_id = [ + i + for i, x in enumerate( + experiment.parameters + ) + if x + == Parameter( + scaling_parameter + ) + ][0] + ranks = ( + coordinate.__getitem__( + parameter_id + ) + ) # if the specified parameter does not exist else: - raise ExtrapReaderException("The specified scaling parameter \'"+str(scaling_parameter)+"\' could not be found in the passed list of model parameters "+str(self.parameters)+".", profile) - values.append(value[0] * ranks) + raise ExtrapReaderException( + "The specified scaling parameter '" + + str(scaling_parameter) + + "' could not be found in the passed list of model parameters " + + str(self.parameters) + + ".", + profile, + ) + values.append( + value[0] * ranks) # add values for all other metrics else: values.append(value[0]) @@ -653,30 +843,62 @@ def produce_models(self, use_median=True, calc_total_metrics=False, else: values.append(value[0]) else: - raise ExtrapReaderException("There are no values recorded for the metric \'"+str(metric)+"\' in the profile \'"+str(profile)+"\'.", profile) + raise ExtrapReaderException( + "There are no values recorded for the metric '" + + str(metric) + + "' in the profile '" + + str(profile) + + "'.", + profile, + ) if coordinate_exists == False: - raise ExtrapReaderException("The parameter value combintation \'"+str(coordinate)+"\' could not be matched to any of the profiles. This could indicate missing metadata values for one or more of the parameters specified for modeling.", profile) + raise ExtrapReaderException( + "The parameter value combintation '" + + str(coordinate) + + "' could not be matched to any of the profiles. This could indicate missing metadata values for one or more of the parameters specified for modeling.", + profile, + ) if callpath_exists == False: - raise ExtrapReaderException("The node/callpath \'"+str(callpath)+"\' does not exist in any of the profiles.", profile) + raise ExtrapReaderException( + "The node/callpath '" + + str(callpath) + + "' does not exist in any of the profiles.", + profile, + ) except ExtrapReaderException as e: - print("WARNING: Could not create an Extra-P measurement object for: callpath=\'"+str(callpath)+"\', metric=\'"+str(metric)+"\', coordinate=\'"+str(coordinate)+"\' from the profile: "+str(e.profile)+". "+str(e.message)) - + print( + "WARNING: Could not create an Extra-P measurement object for: callpath='" + + str(callpath) + + "', metric='" + + str(metric) + + "', coordinate='" + + str(coordinate) + + "' from the profile: " + + str(e.profile) + + ". " + + str(e.message) + ) + # if there was no data found at all for this config, do not add any measurement to the experiment if len(values) > 0: - experiment.add_measurement(Measurement(coordinate, callpath, metric, values)) - + experiment.add_measurement( + Measurement(coordinate, callpath, metric, values) + ) + # create the calltree based on the callpaths - #NOTE: could pipe actual calltree in here + # NOTE: could pipe actual calltree in here experiment.call_tree = create_call_tree(experiment.callpaths) - + # check the created experiment for its validty io_helper.validate_experiment(experiment) - + # generate models using Extra-P model generator - model_gen = ModelGenerator(experiment, name="Default Model", use_median=use_median) + model_gen = ModelGenerator( + experiment, name="Default Model", use_median=use_median + ) model_gen.model_all() experiment.add_modeler(model_gen) - + # add the models, and statistics into the dataframe for callpath in experiment.callpaths: for metric in experiment.metrics: @@ -686,15 +908,15 @@ def produce_models(self, use_median=True, calc_total_metrics=False, # catch key errors when queriying for models with a callpath, metric combination # that does not exist because there was no measurement object created for them try: - self.tht.statsframe.dataframe.at[node, str(metric) + MODEL_TAG] = ModelWrapper( - model_gen.models[mkey], self.parameters - ) + self.tht.statsframe.dataframe.at[ + node, str(metric) + MODEL_TAG + ] = ModelWrapper(model_gen.models[mkey], self.parameters) # Add statistics to aggregated statistics table if add_stats: self._add_extrap_statistics(node, str(metric)) except Exception: pass - + self.experiment = experiment def _componentize_function(model_object): @@ -716,7 +938,8 @@ def _componentize_function(model_object): # Terms of form "coefficient * variables" for term in fnc.compound_terms: # Join variables of the same term together - variable_column = " * ".join(t.to_string() for t in term.simple_terms) + variable_column = " * ".join(t.to_string() + for t in term.simple_terms) term_dict[variable_column] = term.coefficient From b72f019ec8253395dacc7579f307f6bd3a617395 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Tue, 29 Aug 2023 15:32:04 -0700 Subject: [PATCH 14/57] updated test cases for extra-p code --- thicket/tests/test_model_extrap.py | 40 +++++++++++------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/thicket/tests/test_model_extrap.py b/thicket/tests/test_model_extrap.py index da4d4267..35516369 100644 --- a/thicket/tests/test_model_extrap.py +++ b/thicket/tests/test_model_extrap.py @@ -22,26 +22,18 @@ def test_model_extrap(mpi_scaling_cali): # Model created using metadata column mdl = Modeling( t_ens, - "jobsize", - chosen_metrics=[ + parameters=["jobsize"], + metrics=[ "Avg time/rank", ], ) mdl.produce_models() # Model created using manually-input core counts for each file - core_list = { - mpi_scaling_cali[0]: 27, - mpi_scaling_cali[1]: 64, - mpi_scaling_cali[2]: 125, - mpi_scaling_cali[3]: 216, - mpi_scaling_cali[4]: 343, - } mdl2 = Modeling( t_ens, - "cores", - core_list, - chosen_metrics=[ + parameters=["mpi.world.size"], + metrics=[ "Avg time/rank", ], ) @@ -67,32 +59,30 @@ def test_componentize_functions(mpi_scaling_cali): mdl = Modeling( t_ens, - "jobsize", - chosen_metrics=[ + parameters=["jobsize"], + metrics=[ "Avg time/rank", "Max time/rank", ], ) - mdl.produce_models(add_stats=False) + mdl.produce_models(add_stats=False, use_median=False) mdl.componentize_statsframe() xp_comp_df = t_ens.statsframe.dataframe # Check shape - assert xp_comp_df.shape == (45, 22) + assert xp_comp_df.shape == (45, 15) # Check values epsilon = 1e-10 # Account for rounding/approximation - val = xp_comp_df[("Avg time/rank_extrap-model", "c")].iloc[0] - assert abs(val - 1.91978782561084e-05) < epsilon + val = xp_comp_df[("Avg time/rank_extrap-model", + "p^(4/3) * log2(p)^(1)")].iloc[5] + assert abs(val - 7.635268e-09) < epsilon - val = xp_comp_df[("Avg time/rank_extrap-model", "c")].iloc[10] - assert abs(val - -0.003861532835811386) < epsilon + val = xp_comp_df[("Avg time/rank_extrap-model", "log2(p)^(1)")].iloc[10] + assert abs(val - 0.004877826563263911) < epsilon - val = xp_comp_df[("Avg time/rank_extrap-model", "p^(9/4)")].iloc[0] - assert abs(val - 9.088016797416257e-09) < epsilon - - val = xp_comp_df[("Avg time/rank_extrap-model", "p^(4/3) * log2(p)^(1)")].iloc[5] - assert abs(val - 7.635268055673417e-09) < epsilon + val = xp_comp_df[("Max time/rank_extrap-model", "c")].iloc[0] + assert abs(val - 8.3074767) < epsilon From a4b8f7e643fb718f1c94ede51cfda4b13563f524 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Tue, 29 Aug 2023 15:45:07 -0700 Subject: [PATCH 15/57] added optional parameter for display function to specify scaling curve --- thicket/model_extrap.py | 141 ++++++++++++++++++++++++++++------------ 1 file changed, 98 insertions(+), 43 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index cfeee7b2..e2bd2e02 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -146,13 +146,13 @@ def convert_function_to_scientific_notation(self, model_function) -> str: scientific_function = str(model_copy) scientific_function = scientific_function.replace("+-", "-") scientific_function = scientific_function.replace("+ -", "-") - scientific_function = scientific_function.replace("*", "\cdot") + scientific_function = scientific_function.replace("*", "\\cdot") scientific_function = scientific_function.replace("(", "{") scientific_function = scientific_function.replace(")", "}") scientific_function = scientific_function.replace( - "log2{p}", "\log_2(p)") + "log2{p}", "\\log_2(p)") scientific_function = scientific_function.replace( - "log2{q}", "\log_2(q)") + "log2{q}", "\\log_2(q)") scientific_function = "$" + scientific_function + "$" return scientific_function @@ -164,6 +164,7 @@ def display_one_parameter_model( RSS=False, AR2=False, show_opt_scaling=False, + opt_scaling_func=None, ): """Display function to visualize performance models with one model parameter. @@ -174,6 +175,7 @@ def display_one_parameter_model( RSS (bool, optional): whether to display Extra-P model RSS on the plot. Defaults to False. AR2 (bool, optional): whether to display Extra-P model AR2 on the plot. Defaults to False. show_opt_scaling (bool, optional): whether to display the optimal scaling curve. Defaults to False. + opt_scaling_func (str, optional): an optimal scaling function as a python interpretable string provided by the user. Defaults to None. Raises: Exception: Raises an exception if the optimal scaling curve can not be plotted for the given model parameter. @@ -209,32 +211,20 @@ def display_one_parameter_model( plt.ioff() fig, ax = plt.subplots() - if show_opt_scaling == True: - y_vals_opt = [] - if self.parameters[0] == "jobsize": - for _ in range(len(y_vals)): - y_vals_opt.append(y_vals[0]) - ax.plot(x_vals, y_vals_opt, - label="optimal scaling", color="red") - else: - raise Exception( - "Plotting the optimal scaling is currently not supported for other parameters." - ) - # plot the model ax.plot(x_vals, y_vals, label=scientific_function, color="blue") # plot optional features like min/max - if show_mean == True: + if show_mean is True: ax.plot( params, mean, color="black", marker="+", - label=self.mdl.callpath, + label="mean", linestyle="None", ) - if show_median == True: + if show_median is True: ax.plot( params, median, @@ -243,7 +233,7 @@ def display_one_parameter_model( label="median", linestyle="None", ) - if show_min_max == True: + if show_min_max is True: ax.plot( params, mins, color="black", marker="_", label="min", linestyle="None" ) @@ -258,6 +248,32 @@ def display_one_parameter_model( line_x.append(np.nan), line_y.append(np.nan) ax.plot(line_x, line_y, color="black") + if show_opt_scaling is True: + # if the user provides a custom function + if opt_scaling_func is not None: + y_vals_opt = [] + try: + # needs to be p, because the diest model parameter chosen by extra-p is p + for p in x_vals: + from math import log2 + y_vals_opt.append(eval(opt_scaling_func)) + ax.plot(x_vals, y_vals_opt, + label="optimal scaling", color="red") + except Exception as e: + print("WARNING: optimal scaling curve could not be drawn. The function needs to be interpretable by the python eval() function and the parameters need to be the same as the ones shwon on the figures. See the following exception for more information: "+str(e)) + # otherwise try to figure out the optimal scaling curve automatically + else: + if self.parameters[0] == "jobsize": + y_vals_opt = [] + for _ in range(len(y_vals)): + y_vals_opt.append(y_vals[0]) + ax.plot(x_vals, y_vals_opt, + label="optimal scaling", color="red") + else: + raise Exception( + "Plotting the optimal scaling automatically is currently not supported for the chosen parameter." + ) + # plot axes and titles ax.set_xlabel(self.parameters[0] + " $p$") ax.set_ylabel(self.mdl.metric) @@ -277,13 +293,13 @@ def display_one_parameter_model( ax.text( x_vals[0], y_pos_text, - "AR2 = " + ar2, + "AR\u00b2 = " + ar2, ) elif RSS and AR2: ax.text( x_vals[0], y_pos_text, - "RSS = " + rss + "\nAR2 = " + ar2, + "RSS = " + rss + "\nAR\u00b2 = " + ar2, ) # plot legend @@ -350,6 +366,7 @@ def display_two_parameter_model( RSS=False, AR2=False, show_opt_scaling=False, + opt_scaling_func=None, ): """Display function to visualize performance models with two model parameters. @@ -360,6 +377,7 @@ def display_two_parameter_model( RSS (bool, optional): whether to display Extra-P model RSS on the plot. Defaults to False. AR2 (bool, optional): whether to display Extra-P model AR2 on the plot. Defaults to False. show_opt_scaling (bool, optional): whether to display the optimal scaling curve. Defaults to False. + opt_scaling_func (str, optional): an optimal scaling function as a python interpretable string provided by the user. Defaults to None. Raises: Exception: Raises an exception if the optimal scaling curve can not be plotted for the given model parameter. @@ -394,10 +412,14 @@ def display_two_parameter_model( x_vals, y_vals = np.meshgrid(x_vals, y_vals) z_vals = self.mdl.hypothesis.function.evaluate([x_vals, y_vals]) - def opt_scaling_func(x, y): + # opt. scaling function used for auto. detection + def opt_scaling_func_auto(x, y): return (means[0] / 100) * y - z_vals2 = opt_scaling_func(x_vals, y_vals) + # opt. scaling function used for use defined inputs + def opt_scaling_func_user(p, q): + from numpy import log2 + return eval(opt_scaling_func) plt.ioff() @@ -405,18 +427,46 @@ def opt_scaling_func(x, y): ax = fig.gca(projection="3d") if show_opt_scaling: - if self.parameters[0] == "jobsize" and self.parameters[1] == "problem_size": - ax.plot_surface( - x_vals, - y_vals, - z_vals2, - label="optimal scaling", - rstride=1, - cstride=1, - antialiased=False, - alpha=0.1, - color="red", - ) + # if the user provides a custom scaling function + if opt_scaling_func is not None: + z_vals_opt = [] + try: + # needs to be p,q, because these are the model parameter chosen by extra-p first + # for p, q in x_vals, y_vals: + # z_vals_opt.append(eval(opt_scaling_func)) + z_vals_opt = opt_scaling_func_user(x_vals, y_vals) + ax.plot_surface( + x_vals, + y_vals, + z_vals_opt, + label="optimal scaling", + rstride=1, + cstride=1, + antialiased=False, + alpha=0.1, + color="red", + ) + except Exception as e: + print("WARNING: optimal scaling curve could not be drawn. The function needs to be interpretable by the python eval() function and the parameters need to be the same as the ones shwon on the figures. See the following exception for more information: "+str(e)) + # otherwise try to figure out the optimal scaling curve automatically + else: + if self.parameters[0] == "jobsize" and self.parameters[1] == "problem_size": + z_vals_opt = opt_scaling_func_auto(x_vals, y_vals) + ax.plot_surface( + x_vals, + y_vals, + z_vals_opt, + label="optimal scaling", + rstride=1, + cstride=1, + antialiased=False, + alpha=0.1, + color="red", + ) + else: + raise Exception( + "Plotting the optimal scaling automatically is currently not supported for the chosen parameters." + ) # plot model as surface plot depending on options given if show_mean or show_median or show_min_max or show_opt_scaling: @@ -504,14 +554,14 @@ def opt_scaling_func(x, y): ax.text2D( 0, 0.75, - "AR2 = " + ar2, + "AR\u00b2 = " + ar2, transform=ax.transAxes, ) elif RSS and AR2: ax.text2D( 0, 0.75, - "RSS = " + rss + "\nAR2 = " + ar2, + "RSS = " + rss + "\nAR\u00b2 = " + ar2, transform=ax.transAxes, ) @@ -528,6 +578,7 @@ def display( RSS=False, AR2=False, show_opt_scaling=False, + opt_scaling_func=None, ): """General display function for visualizing a performance model. Calls the specific display function depending on the number of @@ -540,6 +591,7 @@ def display( RSS (bool, optional): whether to display Extra-P model RSS on the plot. Defaults to False. AR2 (bool, optional): whether to display Extra-P model AR2 on the plot. Defaults to False. show_opt_scaling (bool, optional): whether to display the optimal scaling curve. Defaults to False. + opt_scaling_func (str, optional): an optimal scaling function as a python interpretable string provided by the user. Defaults to None. Raises: Exception: Raises an exception if the user tries to display a model with a number of model parameters that is not supported. @@ -551,12 +603,14 @@ def display( # check number of model parameters if len(self.parameters) == 1: fig, ax = self.display_one_parameter_model( - show_mean, show_median, show_min_max, RSS, AR2, show_opt_scaling + show_mean, show_median, show_min_max, + RSS, AR2, show_opt_scaling, opt_scaling_func ) elif len(self.parameters) == 2: fig, ax = self.display_two_parameter_model( - show_mean, show_median, show_min_max, RSS, AR2, show_opt_scaling + show_mean, show_median, show_min_max, + RSS, AR2, show_opt_scaling, opt_scaling_func ) else: @@ -612,10 +666,11 @@ def to_html( RSS=False, AR2=False, show_opt_scaling=False, + opt_scaling_func=None, ): def model_to_img_html(model_obj): fig, _ = model_obj.display( - show_mean, show_median, show_min_max, RSS, AR2, show_opt_scaling + show_mean, show_median, show_min_max, RSS, AR2, show_opt_scaling, opt_scaling_func ) figfile = BytesIO() fig.savefig(figfile, format="jpg", transparent=False) @@ -794,7 +849,7 @@ def produce_models( ) if len(value) == 1: # calculate total metric values - if calc_total_metrics == True: + if calc_total_metrics is True: # convert only data for metrics that are measured per rank if "/rank" in str(metric): # read out scaling parameter for total metric value calculation @@ -851,14 +906,14 @@ def produce_models( + "'.", profile, ) - if coordinate_exists == False: + if coordinate_exists is False: raise ExtrapReaderException( "The parameter value combintation '" + str(coordinate) + "' could not be matched to any of the profiles. This could indicate missing metadata values for one or more of the parameters specified for modeling.", profile, ) - if callpath_exists == False: + if callpath_exists is False: raise ExtrapReaderException( "The node/callpath '" + str(callpath) From de874bf7b7e24b0aeaf74e26079a397e53f15190 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Tue, 29 Aug 2023 15:46:44 -0700 Subject: [PATCH 16/57] formatted code to pep8 standard --- thicket/model_extrap.py | 103 +++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 48 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index e2bd2e02..27d49813 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -149,10 +149,8 @@ def convert_function_to_scientific_notation(self, model_function) -> str: scientific_function = scientific_function.replace("*", "\\cdot") scientific_function = scientific_function.replace("(", "{") scientific_function = scientific_function.replace(")", "}") - scientific_function = scientific_function.replace( - "log2{p}", "\\log_2(p)") - scientific_function = scientific_function.replace( - "log2{q}", "\\log_2(q)") + scientific_function = scientific_function.replace("log2{p}", "\\log_2(p)") + scientific_function = scientific_function.replace("log2{q}", "\\log_2(q)") scientific_function = "$" + scientific_function + "$" return scientific_function @@ -185,8 +183,7 @@ def display_one_parameter_model( """ # sort based on x values - measures_sorted = sorted( - self.mdl.measurements, key=lambda x: x.coordinate[0]) + measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) # compute means, medians, mins, maxes params = [ms.coordinate[0] for ms in measures_sorted] # X values @@ -256,19 +253,21 @@ def display_one_parameter_model( # needs to be p, because the diest model parameter chosen by extra-p is p for p in x_vals: from math import log2 + y_vals_opt.append(eval(opt_scaling_func)) - ax.plot(x_vals, y_vals_opt, - label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") except Exception as e: - print("WARNING: optimal scaling curve could not be drawn. The function needs to be interpretable by the python eval() function and the parameters need to be the same as the ones shwon on the figures. See the following exception for more information: "+str(e)) + print( + "WARNING: optimal scaling curve could not be drawn. The function needs to be interpretable by the python eval() function and the parameters need to be the same as the ones shwon on the figures. See the following exception for more information: " + + str(e) + ) # otherwise try to figure out the optimal scaling curve automatically else: if self.parameters[0] == "jobsize": y_vals_opt = [] for _ in range(len(y_vals)): y_vals_opt.append(y_vals[0]) - ax.plot(x_vals, y_vals_opt, - label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") else: raise Exception( "Plotting the optimal scaling automatically is currently not supported for the chosen parameter." @@ -355,8 +354,7 @@ def draw_legend(self, axis, dict_callpath_color): ) handles.append(mark) - axis.legend(handles=handles, loc="center right", - bbox_to_anchor=(2.75, 0.5)) + axis.legend(handles=handles, loc="center right", bbox_to_anchor=(2.75, 0.5)) def display_two_parameter_model( self, @@ -388,8 +386,7 @@ def display_two_parameter_model( # sort based on x and y values measures_sorted = sorted( - self.mdl.measurements, key=lambda x: ( - x.coordinate[0], x.coordinate[1]) + self.mdl.measurements, key=lambda x: (x.coordinate[0], x.coordinate[1]) ) # get x, y value from measurements @@ -403,11 +400,9 @@ def display_two_parameter_model( maxes = [ms.maximum for ms in measures_sorted] # x value plotting range. Dynamic based off what the largest/smallest values are - x_vals = np.linspace( - start=X_params[0], stop=1.5 * X_params[-1], num=100) + x_vals = np.linspace(start=X_params[0], stop=1.5 * X_params[-1], num=100) # y value plotting range. Dynamic based off what the largest/smallest values are - y_vals = np.linspace( - start=Y_params[0], stop=1.5 * Y_params[-1], num=100) + y_vals = np.linspace(start=Y_params[0], stop=1.5 * Y_params[-1], num=100) x_vals, y_vals = np.meshgrid(x_vals, y_vals) z_vals = self.mdl.hypothesis.function.evaluate([x_vals, y_vals]) @@ -419,6 +414,7 @@ def opt_scaling_func_auto(x, y): # opt. scaling function used for use defined inputs def opt_scaling_func_user(p, q): from numpy import log2 + return eval(opt_scaling_func) plt.ioff() @@ -447,10 +443,16 @@ def opt_scaling_func_user(p, q): color="red", ) except Exception as e: - print("WARNING: optimal scaling curve could not be drawn. The function needs to be interpretable by the python eval() function and the parameters need to be the same as the ones shwon on the figures. See the following exception for more information: "+str(e)) + print( + "WARNING: optimal scaling curve could not be drawn. The function needs to be interpretable by the python eval() function and the parameters need to be the same as the ones shwon on the figures. See the following exception for more information: " + + str(e) + ) # otherwise try to figure out the optimal scaling curve automatically else: - if self.parameters[0] == "jobsize" and self.parameters[1] == "problem_size": + if ( + self.parameters[0] == "jobsize" + and self.parameters[1] == "problem_size" + ): z_vals_opt = opt_scaling_func_auto(x_vals, y_vals) ax.plot_surface( x_vals, @@ -499,21 +501,17 @@ def opt_scaling_func_user(p, q): X_params, Y_params, medians, c="black", marker="x", label="median" ) if show_mean: - ax.scatter(X_params, Y_params, means, - c="black", marker="+", label="mean") + ax.scatter(X_params, Y_params, means, c="black", marker="+", label="mean") if show_min_max: - ax.scatter(X_params, Y_params, mins, - c="black", marker="_", label="min") - ax.scatter(X_params, Y_params, maxes, - c="black", marker="_", label="max") + ax.scatter(X_params, Y_params, mins, c="black", marker="_", label="min") + ax.scatter(X_params, Y_params, maxes, c="black", marker="_", label="max") # Draw connecting line for min, max -> error bars line_x, line_y, line_z = [], [], [] for x, y, min_v, max_v in zip(X_params, Y_params, mins, maxes): line_x.append(x), line_x.append(x) line_y.append(y), line_y.append(y) line_z.append(min_v), line_z.append(max_v) - line_x.append(np.nan), line_y.append( - np.nan), line_z.append(np.nan) + line_x.append(np.nan), line_y.append(np.nan), line_z.append(np.nan) ax.plot(line_x, line_y, line_z, color="black") # axis labels and title @@ -603,14 +601,24 @@ def display( # check number of model parameters if len(self.parameters) == 1: fig, ax = self.display_one_parameter_model( - show_mean, show_median, show_min_max, - RSS, AR2, show_opt_scaling, opt_scaling_func + show_mean, + show_median, + show_min_max, + RSS, + AR2, + show_opt_scaling, + opt_scaling_func, ) elif len(self.parameters) == 2: fig, ax = self.display_two_parameter_model( - show_mean, show_median, show_min_max, - RSS, AR2, show_opt_scaling, opt_scaling_func + show_mean, + show_median, + show_min_max, + RSS, + AR2, + show_opt_scaling, + opt_scaling_func, ) else: @@ -670,14 +678,19 @@ def to_html( ): def model_to_img_html(model_obj): fig, _ = model_obj.display( - show_mean, show_median, show_min_max, RSS, AR2, show_opt_scaling, opt_scaling_func + show_mean, + show_median, + show_min_max, + RSS, + AR2, + show_opt_scaling, + opt_scaling_func, ) figfile = BytesIO() fig.savefig(figfile, format="jpg", transparent=False) figfile.seek(0) figdata_jpg = base64.b64encode(figfile.getvalue()).decode() - imgstr = ''.format( - figdata_jpg) + imgstr = ''.format(figdata_jpg) plt.close(fig) return imgstr @@ -693,8 +706,7 @@ def model_to_img_html(model_obj): except KeyError: pass - frm_dict = { - met + MODEL_TAG: model_to_img_html for met in existing_metrics} + frm_dict = {met + MODEL_TAG: model_to_img_html for met in existing_metrics} # Subset of the aggregated statistics table with only the Extra-P columns selected # TODO: to_html(escape=False, formatters=frm_dict), the formatter does not work for 3D stuff. @@ -766,8 +778,7 @@ def produce_models( experiment.add_parameter(Parameter(parameter)) # Ordering of profiles in the performance data table - ensemble_profile_ordering = list( - self.tht.dataframe.index.unique(level=1)) + ensemble_profile_ordering = list(self.tht.dataframe.index.unique(level=1)) profile_parameter_value_mapping = {} for profile in ensemble_profile_ordering: @@ -836,8 +847,7 @@ def produce_models( ): coordinate_exists = True try: - value = single_prof_df[str( - metric)].tolist() + value = single_prof_df[str(metric)].tolist() except Exception: raise ExtrapReaderException( "The metric '" @@ -855,8 +865,7 @@ def produce_models( # read out scaling parameter for total metric value calculation # if the resource allocation is static if scaling_parameter.isnumeric(): - ranks = int( - scaling_parameter) + ranks = int(scaling_parameter) # otherwise read number of ranks from the provided parameter else: # check if the parameter exists @@ -889,8 +898,7 @@ def produce_models( + ".", profile, ) - values.append( - value[0] * ranks) + values.append(value[0] * ranks) # add values for all other metrics else: values.append(value[0]) @@ -993,8 +1001,7 @@ def _componentize_function(model_object): # Terms of form "coefficient * variables" for term in fnc.compound_terms: # Join variables of the same term together - variable_column = " * ".join(t.to_string() - for t in term.simple_terms) + variable_column = " * ".join(t.to_string() for t in term.simple_terms) term_dict[variable_column] = term.coefficient From 9c121163e4cda7048fca9c39d92d958dbe2cce7f Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Tue, 29 Aug 2023 15:49:48 -0700 Subject: [PATCH 17/57] formatted code to pep8 standard --- thicket/tests/test_model_extrap.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/thicket/tests/test_model_extrap.py b/thicket/tests/test_model_extrap.py index 35516369..45a12685 100644 --- a/thicket/tests/test_model_extrap.py +++ b/thicket/tests/test_model_extrap.py @@ -77,8 +77,7 @@ def test_componentize_functions(mpi_scaling_cali): # Check values epsilon = 1e-10 # Account for rounding/approximation - val = xp_comp_df[("Avg time/rank_extrap-model", - "p^(4/3) * log2(p)^(1)")].iloc[5] + val = xp_comp_df[("Avg time/rank_extrap-model", "p^(4/3) * log2(p)^(1)")].iloc[5] assert abs(val - 7.635268e-09) < epsilon val = xp_comp_df[("Avg time/rank_extrap-model", "log2(p)^(1)")].iloc[10] From 3f1fb4ba4111696181bd467692ab0bb9a56db210 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Tue, 29 Aug 2023 15:56:16 -0700 Subject: [PATCH 18/57] fixed F401 '*.log2' imported but unused --- thicket/model_extrap.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 27d49813..720fb5c3 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -252,7 +252,7 @@ def display_one_parameter_model( try: # needs to be p, because the diest model parameter chosen by extra-p is p for p in x_vals: - from math import log2 + from math import log2 # noqa: F401 y_vals_opt.append(eval(opt_scaling_func)) ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") @@ -413,7 +413,7 @@ def opt_scaling_func_auto(x, y): # opt. scaling function used for use defined inputs def opt_scaling_func_user(p, q): - from numpy import log2 + from numpy import log2 # noqa: F401 return eval(opt_scaling_func) From 51bd6739beaa705243586a7b0ee25912b5c2e986 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Thu, 31 Aug 2023 15:44:50 -0700 Subject: [PATCH 19/57] added complexity analysis for extra-p models with one parameter --- thicket/__init__.py | 2 +- thicket/model_extrap.py | 179 +++++++++++++++++++++++++++++++++++----- 2 files changed, 160 insertions(+), 21 deletions(-) diff --git a/thicket/__init__.py b/thicket/__init__.py index a5559da6..19f8e2f7 100644 --- a/thicket/__init__.py +++ b/thicket/__init__.py @@ -6,7 +6,7 @@ # make flake8 unused names in this file. # flake8: noqa: F401 -from .model_extrap import Modeling +# from .model_extrap import Modeling from .thicket import Thicket from .thicket import InvalidFilter from .thicket import EmptyMetadataTable diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 720fb5c3..55eb41cb 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -149,8 +149,10 @@ def convert_function_to_scientific_notation(self, model_function) -> str: scientific_function = scientific_function.replace("*", "\\cdot") scientific_function = scientific_function.replace("(", "{") scientific_function = scientific_function.replace(")", "}") - scientific_function = scientific_function.replace("log2{p}", "\\log_2(p)") - scientific_function = scientific_function.replace("log2{q}", "\\log_2(q)") + scientific_function = scientific_function.replace( + "log2{p}", "\\log_2(p)") + scientific_function = scientific_function.replace( + "log2{q}", "\\log_2(q)") scientific_function = "$" + scientific_function + "$" return scientific_function @@ -183,7 +185,8 @@ def display_one_parameter_model( """ # sort based on x values - measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) + measures_sorted = sorted( + self.mdl.measurements, key=lambda x: x.coordinate[0]) # compute means, medians, mins, maxes params = [ms.coordinate[0] for ms in measures_sorted] # X values @@ -255,7 +258,8 @@ def display_one_parameter_model( from math import log2 # noqa: F401 y_vals_opt.append(eval(opt_scaling_func)) - ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, + label="optimal scaling", color="red") except Exception as e: print( "WARNING: optimal scaling curve could not be drawn. The function needs to be interpretable by the python eval() function and the parameters need to be the same as the ones shwon on the figures. See the following exception for more information: " @@ -267,7 +271,8 @@ def display_one_parameter_model( y_vals_opt = [] for _ in range(len(y_vals)): y_vals_opt.append(y_vals[0]) - ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, + label="optimal scaling", color="red") else: raise Exception( "Plotting the optimal scaling automatically is currently not supported for the chosen parameter." @@ -354,7 +359,8 @@ def draw_legend(self, axis, dict_callpath_color): ) handles.append(mark) - axis.legend(handles=handles, loc="center right", bbox_to_anchor=(2.75, 0.5)) + axis.legend(handles=handles, loc="center right", + bbox_to_anchor=(2.75, 0.5)) def display_two_parameter_model( self, @@ -386,7 +392,8 @@ def display_two_parameter_model( # sort based on x and y values measures_sorted = sorted( - self.mdl.measurements, key=lambda x: (x.coordinate[0], x.coordinate[1]) + self.mdl.measurements, key=lambda x: ( + x.coordinate[0], x.coordinate[1]) ) # get x, y value from measurements @@ -400,9 +407,11 @@ def display_two_parameter_model( maxes = [ms.maximum for ms in measures_sorted] # x value plotting range. Dynamic based off what the largest/smallest values are - x_vals = np.linspace(start=X_params[0], stop=1.5 * X_params[-1], num=100) + x_vals = np.linspace( + start=X_params[0], stop=1.5 * X_params[-1], num=100) # y value plotting range. Dynamic based off what the largest/smallest values are - y_vals = np.linspace(start=Y_params[0], stop=1.5 * Y_params[-1], num=100) + y_vals = np.linspace( + start=Y_params[0], stop=1.5 * Y_params[-1], num=100) x_vals, y_vals = np.meshgrid(x_vals, y_vals) z_vals = self.mdl.hypothesis.function.evaluate([x_vals, y_vals]) @@ -501,17 +510,21 @@ def opt_scaling_func_user(p, q): X_params, Y_params, medians, c="black", marker="x", label="median" ) if show_mean: - ax.scatter(X_params, Y_params, means, c="black", marker="+", label="mean") + ax.scatter(X_params, Y_params, means, + c="black", marker="+", label="mean") if show_min_max: - ax.scatter(X_params, Y_params, mins, c="black", marker="_", label="min") - ax.scatter(X_params, Y_params, maxes, c="black", marker="_", label="max") + ax.scatter(X_params, Y_params, mins, + c="black", marker="_", label="min") + ax.scatter(X_params, Y_params, maxes, + c="black", marker="_", label="max") # Draw connecting line for min, max -> error bars line_x, line_y, line_z = [], [], [] for x, y, min_v, max_v in zip(X_params, Y_params, mins, maxes): line_x.append(x), line_x.append(x) line_y.append(y), line_y.append(y) line_z.append(min_v), line_z.append(max_v) - line_x.append(np.nan), line_y.append(np.nan), line_z.append(np.nan) + line_x.append(np.nan), line_y.append( + np.nan), line_z.append(np.nan) ax.plot(line_x, line_y, line_z, color="black") # axis labels and title @@ -690,7 +703,8 @@ def model_to_img_html(model_obj): fig.savefig(figfile, format="jpg", transparent=False) figfile.seek(0) figdata_jpg = base64.b64encode(figfile.getvalue()).decode() - imgstr = ''.format(figdata_jpg) + imgstr = ''.format( + figdata_jpg) plt.close(fig) return imgstr @@ -706,7 +720,8 @@ def model_to_img_html(model_obj): except KeyError: pass - frm_dict = {met + MODEL_TAG: model_to_img_html for met in existing_metrics} + frm_dict = { + met + MODEL_TAG: model_to_img_html for met in existing_metrics} # Subset of the aggregated statistics table with only the Extra-P columns selected # TODO: to_html(escape=False, formatters=frm_dict), the formatter does not work for 3D stuff. @@ -778,7 +793,8 @@ def produce_models( experiment.add_parameter(Parameter(parameter)) # Ordering of profiles in the performance data table - ensemble_profile_ordering = list(self.tht.dataframe.index.unique(level=1)) + ensemble_profile_ordering = list( + self.tht.dataframe.index.unique(level=1)) profile_parameter_value_mapping = {} for profile in ensemble_profile_ordering: @@ -847,7 +863,8 @@ def produce_models( ): coordinate_exists = True try: - value = single_prof_df[str(metric)].tolist() + value = single_prof_df[str( + metric)].tolist() except Exception: raise ExtrapReaderException( "The metric '" @@ -865,7 +882,8 @@ def produce_models( # read out scaling parameter for total metric value calculation # if the resource allocation is static if scaling_parameter.isnumeric(): - ranks = int(scaling_parameter) + ranks = int( + scaling_parameter) # otherwise read number of ranks from the provided parameter else: # check if the parameter exists @@ -898,7 +916,8 @@ def produce_models( + ".", profile, ) - values.append(value[0] * ranks) + values.append( + value[0] * ranks) # add values for all other metrics else: values.append(value[0]) @@ -1001,7 +1020,8 @@ def _componentize_function(model_object): # Terms of form "coefficient * variables" for term in fnc.compound_terms: # Join variables of the same term together - variable_column = " * ".join(t.to_string() for t in term.simple_terms) + variable_column = " * ".join(t.to_string() + for t in term.simple_terms) term_dict[variable_column] = term.coefficient @@ -1059,3 +1079,122 @@ def componentize_statsframe(self, columns=None): # Concatenate dataframes horizontally all_dfs.insert(0, self.tht.statsframe.dataframe) self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) + + def _analyze_complexity(model_object, eval_target, col): + """Componentize one Extra-P modeling object into a dictionary of its parts + + Arguments: + model_object (ModelWrapper): Thicket ModelWrapper Extra-P modeling object + + Returns: + (dict): dictionary of the ModelWrapper's hypothesis function parts + """ + # Dictionary of variables mapped to coefficients + term_dict = {} + # Model object hypothesis function + fnc = model_object.mdl.hypothesis.function + # Constant "c" column + term_dict["c"] = fnc.constant_coefficient + + complexity_class = "" + coefficient = 0 + + return_value = {} + term_values = [] + terms = [] + + if len(fnc.compound_terms) == 0: + # from IPython.display import display, Math + # from sympy import latex + # complexity_class = display(Math('$'+latex("O(1)")+'$')) + complexity_class = "O(1)" + coefficient = fnc.constant_coefficient + return_value[col+"_complexity"] = complexity_class + return_value[col+"_coefficient"] = coefficient + + else: + for term in fnc.compound_terms: + + result = term.evaluate(eval_target) + term_values.append(result) + terms.append(term) + + # Join variables of the same term together + """variable_column = " * ".join(t.to_string() + for t in term.simple_terms) + + term_dict[variable_column] = term.coefficient""" + + max_index = term_values.index(max(term_values)) + + if max(term_values) > fnc.constant_coefficient: + comp = "" + for simple_term in terms[max_index].simple_terms: + if comp == "": + comp += simple_term.to_string() + else: + comp = comp + "*" + simple_term.to_string() + comp = comp.replace("^", "**") + complexity_class = "O("+comp+")" + coefficient = terms[max_index].coefficient + return_value[col+"_complexity"] = complexity_class + return_value[col+"_coefficient"] = coefficient + + else: + complexity_class = "O(1)" + coefficient = fnc.constant_coefficient + return_value[col+"_complexity"] = complexity_class + return_value[col+"_coefficient"] = coefficient + + return return_value + + def complexity_statsframe(self, columns=None, eval_target=None): + + if eval_target is None: + raise Exception( + "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for.") + else: + + # Use all Extra-P columns + if columns is None: + columns = [ + col + for col in self.tht.statsframe.dataframe + if isinstance(self.tht.statsframe.dataframe[col][0], ModelWrapper) + ] + + # Error checking + for c in columns: + if c not in self.tht.statsframe.dataframe.columns: + raise ValueError( + "column " + c + " is not in the aggregated statistics table." + ) + elif not isinstance(self.tht.statsframe.dataframe[c][0], ModelWrapper): + raise TypeError( + "column " + + c + + " is not the right type (thicket.model_extrap.ModelWrapper)." + ) + + # Process each column + all_dfs = [] + for col in columns: + + # Get list of components for this column + components = [ + Modeling._analyze_complexity(model_obj, eval_target, col) + for model_obj in self.tht.statsframe.dataframe[col] + ] + + # Component dataframe + comp_df = pd.DataFrame( + data=components, index=self.tht.statsframe.dataframe.index + ) + + # Add column name as index level + # comp_df.columns = "[col], comp_df.columns.to_list()" + all_dfs.append(comp_df) + + # Concatenate dataframes horizontally + all_dfs.insert(0, self.tht.statsframe.dataframe) + self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) From e843a94d0cf03beb48756d9d8ba644c9690232bf Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Fri, 1 Sep 2023 11:42:25 -0700 Subject: [PATCH 20/57] added growth trend ranking to analyze complexity function --- thicket/model_extrap.py | 187 ++++++++++++++++++++++++---------------- 1 file changed, 113 insertions(+), 74 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 55eb41cb..4fb0e05a 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -13,6 +13,7 @@ import matplotlib.ticker as mticker import numpy as np import pandas as pd +from scipy.stats import rankdata from extrap.fileio import io_helper from extrap.modelers.model_generator import ModelGenerator @@ -149,10 +150,8 @@ def convert_function_to_scientific_notation(self, model_function) -> str: scientific_function = scientific_function.replace("*", "\\cdot") scientific_function = scientific_function.replace("(", "{") scientific_function = scientific_function.replace(")", "}") - scientific_function = scientific_function.replace( - "log2{p}", "\\log_2(p)") - scientific_function = scientific_function.replace( - "log2{q}", "\\log_2(q)") + scientific_function = scientific_function.replace("log2{p}", "\\log_2(p)") + scientific_function = scientific_function.replace("log2{q}", "\\log_2(q)") scientific_function = "$" + scientific_function + "$" return scientific_function @@ -185,8 +184,7 @@ def display_one_parameter_model( """ # sort based on x values - measures_sorted = sorted( - self.mdl.measurements, key=lambda x: x.coordinate[0]) + measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) # compute means, medians, mins, maxes params = [ms.coordinate[0] for ms in measures_sorted] # X values @@ -258,8 +256,7 @@ def display_one_parameter_model( from math import log2 # noqa: F401 y_vals_opt.append(eval(opt_scaling_func)) - ax.plot(x_vals, y_vals_opt, - label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") except Exception as e: print( "WARNING: optimal scaling curve could not be drawn. The function needs to be interpretable by the python eval() function and the parameters need to be the same as the ones shwon on the figures. See the following exception for more information: " @@ -271,8 +268,7 @@ def display_one_parameter_model( y_vals_opt = [] for _ in range(len(y_vals)): y_vals_opt.append(y_vals[0]) - ax.plot(x_vals, y_vals_opt, - label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") else: raise Exception( "Plotting the optimal scaling automatically is currently not supported for the chosen parameter." @@ -359,8 +355,7 @@ def draw_legend(self, axis, dict_callpath_color): ) handles.append(mark) - axis.legend(handles=handles, loc="center right", - bbox_to_anchor=(2.75, 0.5)) + axis.legend(handles=handles, loc="center right", bbox_to_anchor=(2.75, 0.5)) def display_two_parameter_model( self, @@ -392,8 +387,7 @@ def display_two_parameter_model( # sort based on x and y values measures_sorted = sorted( - self.mdl.measurements, key=lambda x: ( - x.coordinate[0], x.coordinate[1]) + self.mdl.measurements, key=lambda x: (x.coordinate[0], x.coordinate[1]) ) # get x, y value from measurements @@ -407,11 +401,9 @@ def display_two_parameter_model( maxes = [ms.maximum for ms in measures_sorted] # x value plotting range. Dynamic based off what the largest/smallest values are - x_vals = np.linspace( - start=X_params[0], stop=1.5 * X_params[-1], num=100) + x_vals = np.linspace(start=X_params[0], stop=1.5 * X_params[-1], num=100) # y value plotting range. Dynamic based off what the largest/smallest values are - y_vals = np.linspace( - start=Y_params[0], stop=1.5 * Y_params[-1], num=100) + y_vals = np.linspace(start=Y_params[0], stop=1.5 * Y_params[-1], num=100) x_vals, y_vals = np.meshgrid(x_vals, y_vals) z_vals = self.mdl.hypothesis.function.evaluate([x_vals, y_vals]) @@ -510,21 +502,17 @@ def opt_scaling_func_user(p, q): X_params, Y_params, medians, c="black", marker="x", label="median" ) if show_mean: - ax.scatter(X_params, Y_params, means, - c="black", marker="+", label="mean") + ax.scatter(X_params, Y_params, means, c="black", marker="+", label="mean") if show_min_max: - ax.scatter(X_params, Y_params, mins, - c="black", marker="_", label="min") - ax.scatter(X_params, Y_params, maxes, - c="black", marker="_", label="max") + ax.scatter(X_params, Y_params, mins, c="black", marker="_", label="min") + ax.scatter(X_params, Y_params, maxes, c="black", marker="_", label="max") # Draw connecting line for min, max -> error bars line_x, line_y, line_z = [], [], [] for x, y, min_v, max_v in zip(X_params, Y_params, mins, maxes): line_x.append(x), line_x.append(x) line_y.append(y), line_y.append(y) line_z.append(min_v), line_z.append(max_v) - line_x.append(np.nan), line_y.append( - np.nan), line_z.append(np.nan) + line_x.append(np.nan), line_y.append(np.nan), line_z.append(np.nan) ax.plot(line_x, line_y, line_z, color="black") # axis labels and title @@ -703,8 +691,7 @@ def model_to_img_html(model_obj): fig.savefig(figfile, format="jpg", transparent=False) figfile.seek(0) figdata_jpg = base64.b64encode(figfile.getvalue()).decode() - imgstr = ''.format( - figdata_jpg) + imgstr = ''.format(figdata_jpg) plt.close(fig) return imgstr @@ -720,8 +707,7 @@ def model_to_img_html(model_obj): except KeyError: pass - frm_dict = { - met + MODEL_TAG: model_to_img_html for met in existing_metrics} + frm_dict = {met + MODEL_TAG: model_to_img_html for met in existing_metrics} # Subset of the aggregated statistics table with only the Extra-P columns selected # TODO: to_html(escape=False, formatters=frm_dict), the formatter does not work for 3D stuff. @@ -793,8 +779,7 @@ def produce_models( experiment.add_parameter(Parameter(parameter)) # Ordering of profiles in the performance data table - ensemble_profile_ordering = list( - self.tht.dataframe.index.unique(level=1)) + ensemble_profile_ordering = list(self.tht.dataframe.index.unique(level=1)) profile_parameter_value_mapping = {} for profile in ensemble_profile_ordering: @@ -863,8 +848,7 @@ def produce_models( ): coordinate_exists = True try: - value = single_prof_df[str( - metric)].tolist() + value = single_prof_df[str(metric)].tolist() except Exception: raise ExtrapReaderException( "The metric '" @@ -882,8 +866,7 @@ def produce_models( # read out scaling parameter for total metric value calculation # if the resource allocation is static if scaling_parameter.isnumeric(): - ranks = int( - scaling_parameter) + ranks = int(scaling_parameter) # otherwise read number of ranks from the provided parameter else: # check if the parameter exists @@ -916,8 +899,7 @@ def produce_models( + ".", profile, ) - values.append( - value[0] * ranks) + values.append(value[0] * ranks) # add values for all other metrics else: values.append(value[0]) @@ -1001,6 +983,7 @@ def produce_models( self.experiment = experiment + # TODO: add multi parameter support def _componentize_function(model_object): """Componentize one Extra-P modeling object into a dictionary of its parts @@ -1020,13 +1003,13 @@ def _componentize_function(model_object): # Terms of form "coefficient * variables" for term in fnc.compound_terms: # Join variables of the same term together - variable_column = " * ".join(t.to_string() - for t in term.simple_terms) + variable_column = " * ".join(t.to_string() for t in term.simple_terms) term_dict[variable_column] = term.coefficient return term_dict + # TODO: add multi parameter support def componentize_statsframe(self, columns=None): """Componentize multiple Extra-P modeling objects in the aggregated statistics table @@ -1080,22 +1063,10 @@ def componentize_statsframe(self, columns=None): all_dfs.insert(0, self.tht.statsframe.dataframe) self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) + # TODO: add multi parameter support def _analyze_complexity(model_object, eval_target, col): - """Componentize one Extra-P modeling object into a dictionary of its parts - - Arguments: - model_object (ModelWrapper): Thicket ModelWrapper Extra-P modeling object - - Returns: - (dict): dictionary of the ModelWrapper's hypothesis function parts - """ - # Dictionary of variables mapped to coefficients - term_dict = {} # Model object hypothesis function fnc = model_object.mdl.hypothesis.function - # Constant "c" column - term_dict["c"] = fnc.constant_coefficient - complexity_class = "" coefficient = 0 @@ -1104,27 +1075,17 @@ def _analyze_complexity(model_object, eval_target, col): terms = [] if len(fnc.compound_terms) == 0: - # from IPython.display import display, Math - # from sympy import latex - # complexity_class = display(Math('$'+latex("O(1)")+'$')) complexity_class = "O(1)" coefficient = fnc.constant_coefficient - return_value[col+"_complexity"] = complexity_class - return_value[col+"_coefficient"] = coefficient + return_value[col + "_complexity"] = complexity_class + return_value[col + "_coefficient"] = coefficient else: for term in fnc.compound_terms: - result = term.evaluate(eval_target) term_values.append(result) terms.append(term) - # Join variables of the same term together - """variable_column = " * ".join(t.to_string() - for t in term.simple_terms) - - term_dict[variable_column] = term.coefficient""" - max_index = term_values.index(max(term_values)) if max(term_values) > fnc.constant_coefficient: @@ -1135,26 +1096,26 @@ def _analyze_complexity(model_object, eval_target, col): else: comp = comp + "*" + simple_term.to_string() comp = comp.replace("^", "**") - complexity_class = "O("+comp+")" + complexity_class = "O(" + comp + ")" coefficient = terms[max_index].coefficient - return_value[col+"_complexity"] = complexity_class - return_value[col+"_coefficient"] = coefficient + return_value[col + "_complexity"] = complexity_class + return_value[col + "_coefficient"] = coefficient else: complexity_class = "O(1)" coefficient = fnc.constant_coefficient - return_value[col+"_complexity"] = complexity_class - return_value[col+"_coefficient"] = coefficient + return_value[col + "_complexity"] = complexity_class + return_value[col + "_coefficient"] = coefficient return return_value + # TODO: add multi parameter support def complexity_statsframe(self, columns=None, eval_target=None): - if eval_target is None: raise Exception( - "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for.") + "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." + ) else: - # Use all Extra-P columns if columns is None: columns = [ @@ -1179,7 +1140,6 @@ def complexity_statsframe(self, columns=None, eval_target=None): # Process each column all_dfs = [] for col in columns: - # Get list of components for this column components = [ Modeling._analyze_complexity(model_obj, eval_target, col) @@ -1198,3 +1158,82 @@ def complexity_statsframe(self, columns=None, eval_target=None): # Concatenate dataframes horizontally all_dfs.insert(0, self.tht.statsframe.dataframe) self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) + + # Add callpath ranking to the dataframe + all_dfs = [] + for col in columns: + total_metric_value = 0 + metric_values = [] + for model_obj in self.tht.statsframe.dataframe[col]: + metric_value = model_obj.mdl.hypothesis.function.evaluate( + eval_target + ) + total_metric_value += metric_value + metric_values.append(metric_value) + percentages = [] + for value in metric_values: + percentage = value / (total_metric_value / 100) + if percentage < 0: + percentages.append(0) + else: + percentages.append(percentage) + reverse_ranking = len(percentages) - rankdata( + percentages, method="ordinal" + ).astype(int) + for i in range(len(reverse_ranking)): + reverse_ranking[i] += 1 + ranking_list = [] + for i in range(len(reverse_ranking)): + ranking_dict = {} + ranking_dict[col + "_growth_rank"] = reverse_ranking[i] + ranking_list.append(ranking_dict) + + # Component dataframe + comp_df = pd.DataFrame( + data=ranking_list, index=self.tht.statsframe.dataframe.index + ) + + # Add column name as index level + # comp_df.columns = "[col], comp_df.columns.to_list()" + all_dfs.append(comp_df) + + # Concatenate dataframes horizontally + all_dfs.insert(0, self.tht.statsframe.dataframe) + self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) + + def phase_statsframe(self, columns=None, eval_target=None): + if eval_target is None: + raise Exception( + "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." + ) + else: + # Use all Extra-P columns + if columns is None: + columns = [ + col + for col in self.tht.statsframe.dataframe + if isinstance(self.tht.statsframe.dataframe[col][0], ModelWrapper) + ] + + print("columns:", columns) + + callpaths = self.tht.statsframe.dataframe["name"].values.tolist() + print("callpaths:", callpaths) + + communication = {} + computation = {} + for i in range(len(callpaths)): + if "MPI" in callpaths[i]: + communication[callpaths[i]] = i + else: + computation[callpaths[i]] = i + + print("communication:", communication) + print("computation:", computation) + + # TODO: aggregate the functions for both types and come up with one that describes all of them + + # TODO: how to return the data back, because pandas can't aggregate functions with each other, + # so there is no point in introducing an extra column type(MPI,comp) to group by that... + + return self.tht.statsframe.dataframe From 25a6b86cf92a24ee284cc590a8af9b771cf25d82 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Wed, 6 Sep 2023 14:21:19 -0700 Subject: [PATCH 21/57] added type checking, hints --- thicket/model_extrap.py | 231 ++++++++++++++++++++++++++++------------ 1 file changed, 160 insertions(+), 71 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 4fb0e05a..da072a15 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -8,13 +8,18 @@ from io import BytesIO import matplotlib.pyplot as plt +from matplotlib.figure import Figure +from matplotlib.axes import Axes from matplotlib import patches as mpatches import matplotlib.lines as mlines import matplotlib.ticker as mticker import numpy as np import pandas as pd +from pandas import DataFrame from scipy.stats import rankdata +from hatchet import node + from extrap.fileio import io_helper from extrap.modelers.model_generator import ModelGenerator from extrap.entities.experiment import Experiment @@ -24,6 +29,8 @@ from extrap.entities.metric import Metric from extrap.entities.callpath import Callpath from extrap.entities.coordinate import Coordinate +from extrap.entities.model import Model +from extrap.entities.functions import Function MODEL_TAG = "_extrap-model" @@ -56,7 +63,7 @@ class ModelWrapper: the model. """ - def __init__(self, mdl, parameters): + def __init__(self, mdl: Model, parameters: list[str]) -> None: """Init function of the ModelWrapper class. Args: @@ -102,7 +109,10 @@ def convert_coefficient_to_scientific_notation(self, coefficient: float) -> str: terms = x.split(" ") if not terms[0][:1].isnumeric(): coeff = terms[0][1:] - coeff = "{:.3f}".format(float(coeff)) + try: + coeff = "{:.3f}".format(float(coeff)) + except ValueError: + pass new_coeff = "" new_coeff += "-" new_coeff += coeff @@ -112,7 +122,10 @@ def convert_coefficient_to_scientific_notation(self, coefficient: float) -> str: return new_coeff else: coeff = terms[0] - coeff = "{:.3f}".format(float(coeff)) + try: + coeff = "{:.3f}".format(float(coeff)) + except ValueError: + pass new_coeff = "" new_coeff += coeff for i in range(len(terms)): @@ -120,7 +133,7 @@ def convert_coefficient_to_scientific_notation(self, coefficient: float) -> str: new_coeff += terms[i] return new_coeff - def convert_function_to_scientific_notation(self, model_function) -> str: + def convert_function_to_scientific_notation(self, model_function: Function) -> str: """This function converts the created performance model function into a scientific notation in string format. @@ -150,21 +163,23 @@ def convert_function_to_scientific_notation(self, model_function) -> str: scientific_function = scientific_function.replace("*", "\\cdot") scientific_function = scientific_function.replace("(", "{") scientific_function = scientific_function.replace(")", "}") - scientific_function = scientific_function.replace("log2{p}", "\\log_2(p)") - scientific_function = scientific_function.replace("log2{q}", "\\log_2(q)") + scientific_function = scientific_function.replace( + "log2{p}", "\\log_2(p)") + scientific_function = scientific_function.replace( + "log2{q}", "\\log_2(q)") scientific_function = "$" + scientific_function + "$" return scientific_function def display_one_parameter_model( self, - show_mean=False, - show_median=False, - show_min_max=False, - RSS=False, - AR2=False, - show_opt_scaling=False, - opt_scaling_func=None, - ): + show_mean: bool = False, + show_median: bool = False, + show_min_max: bool = False, + RSS: bool = False, + AR2: bool = False, + show_opt_scaling: bool = False, + opt_scaling_func: str = None, + ) -> tuple[Figure, Axes]: """Display function to visualize performance models with one model parameter. Args: @@ -184,7 +199,8 @@ def display_one_parameter_model( """ # sort based on x values - measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) + measures_sorted = sorted( + self.mdl.measurements, key=lambda x: x.coordinate[0]) # compute means, medians, mins, maxes params = [ms.coordinate[0] for ms in measures_sorted] # X values @@ -256,7 +272,8 @@ def display_one_parameter_model( from math import log2 # noqa: F401 y_vals_opt.append(eval(opt_scaling_func)) - ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, + label="optimal scaling", color="red") except Exception as e: print( "WARNING: optimal scaling curve could not be drawn. The function needs to be interpretable by the python eval() function and the parameters need to be the same as the ones shwon on the figures. See the following exception for more information: " @@ -268,7 +285,8 @@ def display_one_parameter_model( y_vals_opt = [] for _ in range(len(y_vals)): y_vals_opt.append(y_vals[0]) - ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, + label="optimal scaling", color="red") else: raise Exception( "Plotting the optimal scaling automatically is currently not supported for the chosen parameter." @@ -307,7 +325,7 @@ def display_one_parameter_model( return fig, ax - def draw_legend(self, axis, dict_callpath_color): + def draw_legend(self, axis: Axes, dict_callpath_color: dict[str, list[str]]) -> None: """This method draws a legend for 3D plots. Args: @@ -355,18 +373,19 @@ def draw_legend(self, axis, dict_callpath_color): ) handles.append(mark) - axis.legend(handles=handles, loc="center right", bbox_to_anchor=(2.75, 0.5)) + axis.legend(handles=handles, loc="center right", + bbox_to_anchor=(2.75, 0.5)) def display_two_parameter_model( self, - show_mean=False, - show_median=False, - show_min_max=False, - RSS=False, - AR2=False, - show_opt_scaling=False, - opt_scaling_func=None, - ): + show_mean: bool = False, + show_median: bool = False, + show_min_max: bool = False, + RSS: bool = False, + AR2: bool = False, + show_opt_scaling: bool = False, + opt_scaling_func: str = None, + ) -> tuple[Figure, Axes]: """Display function to visualize performance models with two model parameters. Args: @@ -387,7 +406,8 @@ def display_two_parameter_model( # sort based on x and y values measures_sorted = sorted( - self.mdl.measurements, key=lambda x: (x.coordinate[0], x.coordinate[1]) + self.mdl.measurements, key=lambda x: ( + x.coordinate[0], x.coordinate[1]) ) # get x, y value from measurements @@ -401,9 +421,11 @@ def display_two_parameter_model( maxes = [ms.maximum for ms in measures_sorted] # x value plotting range. Dynamic based off what the largest/smallest values are - x_vals = np.linspace(start=X_params[0], stop=1.5 * X_params[-1], num=100) + x_vals = np.linspace( + start=X_params[0], stop=1.5 * X_params[-1], num=100) # y value plotting range. Dynamic based off what the largest/smallest values are - y_vals = np.linspace(start=Y_params[0], stop=1.5 * Y_params[-1], num=100) + y_vals = np.linspace( + start=Y_params[0], stop=1.5 * Y_params[-1], num=100) x_vals, y_vals = np.meshgrid(x_vals, y_vals) z_vals = self.mdl.hypothesis.function.evaluate([x_vals, y_vals]) @@ -502,17 +524,21 @@ def opt_scaling_func_user(p, q): X_params, Y_params, medians, c="black", marker="x", label="median" ) if show_mean: - ax.scatter(X_params, Y_params, means, c="black", marker="+", label="mean") + ax.scatter(X_params, Y_params, means, + c="black", marker="+", label="mean") if show_min_max: - ax.scatter(X_params, Y_params, mins, c="black", marker="_", label="min") - ax.scatter(X_params, Y_params, maxes, c="black", marker="_", label="max") + ax.scatter(X_params, Y_params, mins, + c="black", marker="_", label="min") + ax.scatter(X_params, Y_params, maxes, + c="black", marker="_", label="max") # Draw connecting line for min, max -> error bars line_x, line_y, line_z = [], [], [] for x, y, min_v, max_v in zip(X_params, Y_params, mins, maxes): line_x.append(x), line_x.append(x) line_y.append(y), line_y.append(y) line_z.append(min_v), line_z.append(max_v) - line_x.append(np.nan), line_y.append(np.nan), line_z.append(np.nan) + line_x.append(np.nan), line_y.append( + np.nan), line_z.append(np.nan) ax.plot(line_x, line_y, line_z, color="black") # axis labels and title @@ -571,14 +597,14 @@ def opt_scaling_func_user(p, q): def display( self, - show_mean=False, - show_median=False, - show_min_max=False, - RSS=False, - AR2=False, - show_opt_scaling=False, - opt_scaling_func=None, - ): + show_mean: bool = False, + show_median: bool = False, + show_min_max: bool = False, + RSS: bool = False, + AR2: bool = False, + show_opt_scaling: bool = False, + opt_scaling_func: str = None, + ) -> tuple[Figure, Axes]: """General display function for visualizing a performance model. Calls the specific display function depending on the number of found model parameters automatically. @@ -635,7 +661,7 @@ def display( class Modeling: """Produce models for all the metrics across the given graphframes.""" - def __init__(self, tht, parameters=None, metrics=None): + def __init__(self, tht, parameters: list[str] = None, metrics: list[str] = None) -> None: """Create a new model object. Adds a model column for each metric for each common frame across all the @@ -669,15 +695,37 @@ def __init__(self, tht, parameters=None, metrics=None): def to_html( self, - show_mean=False, - show_median=False, - show_min_max=False, - RSS=False, - AR2=False, - show_opt_scaling=False, - opt_scaling_func=None, - ): - def model_to_img_html(model_obj): + show_mean: bool = False, + show_median: bool = False, + show_min_max: bool = False, + RSS: bool = False, + AR2: bool = False, + show_opt_scaling: bool = False, + opt_scaling_func: str = None, + ) -> DataFrame: + """Converts the DataFrame into an html version that can be displayed in jupyter notebooks. + + Args: + show_mean (bool, optional): whether to display mean values on the plot. Defaults to False. + show_median (bool, optional): whether to display median values on the plot. Defaults to False. + show_min_max (bool, optional): whether to display min/max values on the plot. Defaults to False. + RSS (bool, optional): whether to display Extra-P model RSS on the plot. Defaults to False. + AR2 (bool, optional): whether to display Extra-P model AR2 on the plot. Defaults to False. + show_opt_scaling (bool, optional): whether to display the optimal scaling curve. Defaults to False. + opt_scaling_func (str, optional): an optimal scaling function as a python interpretable string provided by the user. Defaults to None. + + Returns: + DataFrame: A Pandas DataFrame with the added matplotlib plots. + """ + def model_to_img_html(model_obj: Model) -> str: + """Converts the maplotlib plot of a given model into an image html representation. + + Args: + model_obj (Model): The Extra-P Model for which the plot should be converted. + + Returns: + str: The maplotlib plot in a image html format. + """ fig, _ = model_obj.display( show_mean, show_median, @@ -691,7 +739,8 @@ def model_to_img_html(model_obj): fig.savefig(figfile, format="jpg", transparent=False) figfile.seek(0) figdata_jpg = base64.b64encode(figfile.getvalue()).decode() - imgstr = ''.format(figdata_jpg) + imgstr = ''.format( + figdata_jpg) plt.close(fig) return imgstr @@ -707,7 +756,8 @@ def model_to_img_html(model_obj): except KeyError: pass - frm_dict = {met + MODEL_TAG: model_to_img_html for met in existing_metrics} + frm_dict = { + met + MODEL_TAG: model_to_img_html for met in existing_metrics} # Subset of the aggregated statistics table with only the Extra-P columns selected # TODO: to_html(escape=False, formatters=frm_dict), the formatter does not work for 3D stuff. @@ -716,12 +766,12 @@ def model_to_img_html(model_obj): [met + MODEL_TAG for met in existing_metrics] ].to_html(escape=False, formatters=frm_dict) - def _add_extrap_statistics(self, node, metric): + def _add_extrap_statistics(self, node: node, metric: str) -> None: """Insert the Extra-P hypothesis function statistics into the aggregated statistics table. Has to be called after "produce_models". Arguments: - node (hatchet.Node): The node for which statistics should be calculated + node (hatchet.node): The node for which statistics should be calculated metric (str): The metric for which statistics should be calculated """ hypothesis_fn = self.tht.statsframe.dataframe.at[ @@ -746,11 +796,11 @@ def _add_extrap_statistics(self, node, metric): def produce_models( self, - use_median=True, - calc_total_metrics=False, - scaling_parameter="jobsize", - add_stats=True, - ): + use_median: bool = True, + calc_total_metrics: bool = False, + scaling_parameter: str = "jobsize", + add_stats: bool = True, + ) -> None: """Produces an Extra-P model. Models are generated by calling Extra-P's ModelGenerator. @@ -779,7 +829,8 @@ def produce_models( experiment.add_parameter(Parameter(parameter)) # Ordering of profiles in the performance data table - ensemble_profile_ordering = list(self.tht.dataframe.index.unique(level=1)) + ensemble_profile_ordering = list( + self.tht.dataframe.index.unique(level=1)) profile_parameter_value_mapping = {} for profile in ensemble_profile_ordering: @@ -848,7 +899,8 @@ def produce_models( ): coordinate_exists = True try: - value = single_prof_df[str(metric)].tolist() + value = single_prof_df[str( + metric)].tolist() except Exception: raise ExtrapReaderException( "The metric '" @@ -866,7 +918,8 @@ def produce_models( # read out scaling parameter for total metric value calculation # if the resource allocation is static if scaling_parameter.isnumeric(): - ranks = int(scaling_parameter) + ranks = int( + scaling_parameter) # otherwise read number of ranks from the provided parameter else: # check if the parameter exists @@ -899,7 +952,8 @@ def produce_models( + ".", profile, ) - values.append(value[0] * ranks) + values.append( + value[0] * ranks) # add values for all other metrics else: values.append(value[0]) @@ -984,7 +1038,7 @@ def produce_models( self.experiment = experiment # TODO: add multi parameter support - def _componentize_function(model_object): + def _componentize_function(model_object: Model) -> dict[str, float]: """Componentize one Extra-P modeling object into a dictionary of its parts Arguments: @@ -1003,14 +1057,15 @@ def _componentize_function(model_object): # Terms of form "coefficient * variables" for term in fnc.compound_terms: # Join variables of the same term together - variable_column = " * ".join(t.to_string() for t in term.simple_terms) + variable_column = " * ".join(t.to_string() + for t in term.simple_terms) term_dict[variable_column] = term.coefficient return term_dict # TODO: add multi parameter support - def componentize_statsframe(self, columns=None): + def componentize_statsframe(self, columns: list[str] = None) -> None: """Componentize multiple Extra-P modeling objects in the aggregated statistics table @@ -1064,7 +1119,18 @@ def componentize_statsframe(self, columns=None): self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) # TODO: add multi parameter support - def _analyze_complexity(model_object, eval_target, col): + def _analyze_complexity(model_object: Model, eval_target: float, col: str) -> dict[str, str]: + """Analyzes the complexity of a given Extra-P model by evaluating it for a given target scale and column (metric). + + Args: + model_object (Model): The Extra-P Model, which the complexity analysis should be performed for. + eval_target (float): The target scale for the evaluation. + col (str): The column (metric) to evaluate for. + + Returns: + dict[str, str]: A dictionary containing the new column names for the thicket DataFrame (key) and the found complexity class/their coefficients (values). + """ + # Model object hypothesis function fnc = model_object.mdl.hypothesis.function complexity_class = "" @@ -1110,7 +1176,18 @@ def _analyze_complexity(model_object, eval_target, col): return return_value # TODO: add multi parameter support - def complexity_statsframe(self, columns=None, eval_target=None): + def complexity_statsframe(self, columns: list[str] = None, eval_target: float = None) -> None: + """Analyzes the complexity of the Extra-P models for the given thicket statsframe and the list of selected columns (metrics) for a given target evaluation scale. Then adds the results back into the statsframe. + + Args: + columns (list[str], optional): A list of columns (metrics) that should be considered. Defaults to None. + eval_target (float, optional): The target scale the evaluation should be done for. Defaults to None. + + Raises: + Exception: Raises an exception if the target scale is not provided. + ValueError: Raises a ValueError is not in the aggregates statistics table. + TypeError: Raises a TypeError if the column is not of the right type. + """ if eval_target is None: raise Exception( "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." @@ -1201,7 +1278,19 @@ def complexity_statsframe(self, columns=None, eval_target=None): all_dfs.insert(0, self.tht.statsframe.dataframe) self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) - def phase_statsframe(self, columns=None, eval_target=None): + def phase_statsframe(self, columns: list[str] = None, eval_target: float = None) -> DataFrame: + """Analysis the thicket statsframe by grouping application phases such as computation and communication together to create performance models for these phases. + + Args: + columns (list[str], optional): The list of columns (metrics) that should be considered or the phase analysis. Defaults to None. + eval_target (float, optional): The target evaluation scale for the phase analysis. Defaults to None. + + Raises: + Exception: Raises an exception if the target scale is not provided. + + Returns: + DataFrame: A thicket DataFrame that contains only the phase results. + """ if eval_target is None: raise Exception( "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." From ea69fc151600b12d68f78d4c80afaeffbc64b091 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Wed, 6 Sep 2023 18:28:37 -0700 Subject: [PATCH 22/57] added multi-parameter support for componentize function --- thicket/model_extrap.py | 139 +++++++++++++++++++++++++--------------- 1 file changed, 87 insertions(+), 52 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index da072a15..cc7f8512 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -34,6 +34,35 @@ MODEL_TAG = "_extrap-model" +DEFAULT_PARAM_NAMES = ( + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", +) + class ExtrapReaderException(Exception): """Custom exception class for raising exceptions while reading in data from @@ -163,10 +192,8 @@ def convert_function_to_scientific_notation(self, model_function: Function) -> s scientific_function = scientific_function.replace("*", "\\cdot") scientific_function = scientific_function.replace("(", "{") scientific_function = scientific_function.replace(")", "}") - scientific_function = scientific_function.replace( - "log2{p}", "\\log_2(p)") - scientific_function = scientific_function.replace( - "log2{q}", "\\log_2(q)") + scientific_function = scientific_function.replace("log2{p}", "\\log_2(p)") + scientific_function = scientific_function.replace("log2{q}", "\\log_2(q)") scientific_function = "$" + scientific_function + "$" return scientific_function @@ -199,8 +226,7 @@ def display_one_parameter_model( """ # sort based on x values - measures_sorted = sorted( - self.mdl.measurements, key=lambda x: x.coordinate[0]) + measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) # compute means, medians, mins, maxes params = [ms.coordinate[0] for ms in measures_sorted] # X values @@ -272,8 +298,7 @@ def display_one_parameter_model( from math import log2 # noqa: F401 y_vals_opt.append(eval(opt_scaling_func)) - ax.plot(x_vals, y_vals_opt, - label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") except Exception as e: print( "WARNING: optimal scaling curve could not be drawn. The function needs to be interpretable by the python eval() function and the parameters need to be the same as the ones shwon on the figures. See the following exception for more information: " @@ -285,8 +310,7 @@ def display_one_parameter_model( y_vals_opt = [] for _ in range(len(y_vals)): y_vals_opt.append(y_vals[0]) - ax.plot(x_vals, y_vals_opt, - label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") else: raise Exception( "Plotting the optimal scaling automatically is currently not supported for the chosen parameter." @@ -325,7 +349,9 @@ def display_one_parameter_model( return fig, ax - def draw_legend(self, axis: Axes, dict_callpath_color: dict[str, list[str]]) -> None: + def draw_legend( + self, axis: Axes, dict_callpath_color: dict[str, list[str]] + ) -> None: """This method draws a legend for 3D plots. Args: @@ -373,8 +399,7 @@ def draw_legend(self, axis: Axes, dict_callpath_color: dict[str, list[str]]) -> ) handles.append(mark) - axis.legend(handles=handles, loc="center right", - bbox_to_anchor=(2.75, 0.5)) + axis.legend(handles=handles, loc="center right", bbox_to_anchor=(2.75, 0.5)) def display_two_parameter_model( self, @@ -406,8 +431,7 @@ def display_two_parameter_model( # sort based on x and y values measures_sorted = sorted( - self.mdl.measurements, key=lambda x: ( - x.coordinate[0], x.coordinate[1]) + self.mdl.measurements, key=lambda x: (x.coordinate[0], x.coordinate[1]) ) # get x, y value from measurements @@ -421,11 +445,9 @@ def display_two_parameter_model( maxes = [ms.maximum for ms in measures_sorted] # x value plotting range. Dynamic based off what the largest/smallest values are - x_vals = np.linspace( - start=X_params[0], stop=1.5 * X_params[-1], num=100) + x_vals = np.linspace(start=X_params[0], stop=1.5 * X_params[-1], num=100) # y value plotting range. Dynamic based off what the largest/smallest values are - y_vals = np.linspace( - start=Y_params[0], stop=1.5 * Y_params[-1], num=100) + y_vals = np.linspace(start=Y_params[0], stop=1.5 * Y_params[-1], num=100) x_vals, y_vals = np.meshgrid(x_vals, y_vals) z_vals = self.mdl.hypothesis.function.evaluate([x_vals, y_vals]) @@ -524,21 +546,17 @@ def opt_scaling_func_user(p, q): X_params, Y_params, medians, c="black", marker="x", label="median" ) if show_mean: - ax.scatter(X_params, Y_params, means, - c="black", marker="+", label="mean") + ax.scatter(X_params, Y_params, means, c="black", marker="+", label="mean") if show_min_max: - ax.scatter(X_params, Y_params, mins, - c="black", marker="_", label="min") - ax.scatter(X_params, Y_params, maxes, - c="black", marker="_", label="max") + ax.scatter(X_params, Y_params, mins, c="black", marker="_", label="min") + ax.scatter(X_params, Y_params, maxes, c="black", marker="_", label="max") # Draw connecting line for min, max -> error bars line_x, line_y, line_z = [], [], [] for x, y, min_v, max_v in zip(X_params, Y_params, mins, maxes): line_x.append(x), line_x.append(x) line_y.append(y), line_y.append(y) line_z.append(min_v), line_z.append(max_v) - line_x.append(np.nan), line_y.append( - np.nan), line_z.append(np.nan) + line_x.append(np.nan), line_y.append(np.nan), line_z.append(np.nan) ax.plot(line_x, line_y, line_z, color="black") # axis labels and title @@ -661,7 +679,9 @@ def display( class Modeling: """Produce models for all the metrics across the given graphframes.""" - def __init__(self, tht, parameters: list[str] = None, metrics: list[str] = None) -> None: + def __init__( + self, tht, parameters: list[str] = None, metrics: list[str] = None + ) -> None: """Create a new model object. Adds a model column for each metric for each common frame across all the @@ -717,6 +737,7 @@ def to_html( Returns: DataFrame: A Pandas DataFrame with the added matplotlib plots. """ + def model_to_img_html(model_obj: Model) -> str: """Converts the maplotlib plot of a given model into an image html representation. @@ -739,8 +760,7 @@ def model_to_img_html(model_obj: Model) -> str: fig.savefig(figfile, format="jpg", transparent=False) figfile.seek(0) figdata_jpg = base64.b64encode(figfile.getvalue()).decode() - imgstr = ''.format( - figdata_jpg) + imgstr = ''.format(figdata_jpg) plt.close(fig) return imgstr @@ -756,8 +776,7 @@ def model_to_img_html(model_obj: Model) -> str: except KeyError: pass - frm_dict = { - met + MODEL_TAG: model_to_img_html for met in existing_metrics} + frm_dict = {met + MODEL_TAG: model_to_img_html for met in existing_metrics} # Subset of the aggregated statistics table with only the Extra-P columns selected # TODO: to_html(escape=False, formatters=frm_dict), the formatter does not work for 3D stuff. @@ -829,8 +848,7 @@ def produce_models( experiment.add_parameter(Parameter(parameter)) # Ordering of profiles in the performance data table - ensemble_profile_ordering = list( - self.tht.dataframe.index.unique(level=1)) + ensemble_profile_ordering = list(self.tht.dataframe.index.unique(level=1)) profile_parameter_value_mapping = {} for profile in ensemble_profile_ordering: @@ -899,8 +917,7 @@ def produce_models( ): coordinate_exists = True try: - value = single_prof_df[str( - metric)].tolist() + value = single_prof_df[str(metric)].tolist() except Exception: raise ExtrapReaderException( "The metric '" @@ -918,8 +935,7 @@ def produce_models( # read out scaling parameter for total metric value calculation # if the resource allocation is static if scaling_parameter.isnumeric(): - ranks = int( - scaling_parameter) + ranks = int(scaling_parameter) # otherwise read number of ranks from the provided parameter else: # check if the parameter exists @@ -952,8 +968,7 @@ def produce_models( + ".", profile, ) - values.append( - value[0] * ranks) + values.append(value[0] * ranks) # add values for all other metrics else: values.append(value[0]) @@ -1037,8 +1052,9 @@ def produce_models( self.experiment = experiment - # TODO: add multi parameter support - def _componentize_function(model_object: Model) -> dict[str, float]: + def _componentize_function( + model_object: Model, parameters: list[str] + ) -> dict[str, float]: """Componentize one Extra-P modeling object into a dictionary of its parts Arguments: @@ -1056,15 +1072,28 @@ def _componentize_function(model_object: Model) -> dict[str, float]: # Terms of form "coefficient * variables" for term in fnc.compound_terms: - # Join variables of the same term together - variable_column = " * ".join(t.to_string() - for t in term.simple_terms) + if len(parameters) == 1: + # Join variables of the same term together + variable_column = " * ".join(t.to_string() for t in term.simple_terms) - term_dict[variable_column] = term.coefficient + term_dict[variable_column] = term.coefficient + else: + x = term.parameter_term_pairs + term_str = "" + for i in range(len(x)): + # [0] is the x mpterm + # [1] is the term object + term_parameter_str = DEFAULT_PARAM_NAMES[x[i][0]] + y = x[i][1].to_string(parameter=term_parameter_str) + if i == 0: + term_str += y + else: + term_str = term_str + " * " + y + + term_dict[term_str] = term.coefficient return term_dict - # TODO: add multi parameter support def componentize_statsframe(self, columns: list[str] = None) -> None: """Componentize multiple Extra-P modeling objects in the aggregated statistics table @@ -1099,7 +1128,7 @@ def componentize_statsframe(self, columns: list[str] = None) -> None: for col in columns: # Get list of components for this column components = [ - Modeling._componentize_function(model_obj) + Modeling._componentize_function(model_obj, self.parameters) for model_obj in self.tht.statsframe.dataframe[col] ] @@ -1119,7 +1148,9 @@ def componentize_statsframe(self, columns: list[str] = None) -> None: self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) # TODO: add multi parameter support - def _analyze_complexity(model_object: Model, eval_target: float, col: str) -> dict[str, str]: + def _analyze_complexity( + model_object: Model, eval_target: float, col: str + ) -> dict[str, str]: """Analyzes the complexity of a given Extra-P model by evaluating it for a given target scale and column (metric). Args: @@ -1176,7 +1207,9 @@ def _analyze_complexity(model_object: Model, eval_target: float, col: str) -> di return return_value # TODO: add multi parameter support - def complexity_statsframe(self, columns: list[str] = None, eval_target: float = None) -> None: + def complexity_statsframe( + self, columns: list[str] = None, eval_target: float = None + ) -> None: """Analyzes the complexity of the Extra-P models for the given thicket statsframe and the list of selected columns (metrics) for a given target evaluation scale. Then adds the results back into the statsframe. Args: @@ -1185,7 +1218,7 @@ def complexity_statsframe(self, columns: list[str] = None, eval_target: float = Raises: Exception: Raises an exception if the target scale is not provided. - ValueError: Raises a ValueError is not in the aggregates statistics table. + ValueError: Raises a ValueError is not in the aggregates statistics table. TypeError: Raises a TypeError if the column is not of the right type. """ if eval_target is None: @@ -1278,7 +1311,9 @@ def complexity_statsframe(self, columns: list[str] = None, eval_target: float = all_dfs.insert(0, self.tht.statsframe.dataframe) self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) - def phase_statsframe(self, columns: list[str] = None, eval_target: float = None) -> DataFrame: + def phase_statsframe( + self, columns: list[str] = None, eval_target: float = None + ) -> DataFrame: """Analysis the thicket statsframe by grouping application phases such as computation and communication together to create performance models for these phases. Args: From 773adde48a1b6c9fe4f257ff9ae5751fd5c6111e Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Thu, 7 Sep 2023 11:26:38 -0700 Subject: [PATCH 23/57] added multi-parameter support for complexity_statsframe function --- thicket/model_extrap.py | 75 +++++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 21 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index cc7f8512..cdb48d5e 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -1147,15 +1147,14 @@ def componentize_statsframe(self, columns: list[str] = None) -> None: all_dfs.insert(0, self.tht.statsframe.dataframe) self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) - # TODO: add multi parameter support def _analyze_complexity( - model_object: Model, eval_target: float, col: str + model_object: Model, eval_target: list[float], col: str, parameters: list[str] ) -> dict[str, str]: """Analyzes the complexity of a given Extra-P model by evaluating it for a given target scale and column (metric). Args: model_object (Model): The Extra-P Model, which the complexity analysis should be performed for. - eval_target (float): The target scale for the evaluation. + eval_target (list[float]): The target scale for the evaluation. col (str): The column (metric) to evaluate for. Returns: @@ -1178,25 +1177,54 @@ def _analyze_complexity( return_value[col + "_coefficient"] = coefficient else: - for term in fnc.compound_terms: - result = term.evaluate(eval_target) - term_values.append(result) - terms.append(term) + if len(parameters) == 1: + for term in fnc.compound_terms: + result = term.evaluate(eval_target[0]) + term_values.append(result) + terms.append(term) + else: + for term in fnc.compound_terms: + result = term.evaluate(eval_target) + term_values.append(result) + terms.append(term) max_index = term_values.index(max(term_values)) if max(term_values) > fnc.constant_coefficient: comp = "" - for simple_term in terms[max_index].simple_terms: - if comp == "": - comp += simple_term.to_string() - else: - comp = comp + "*" + simple_term.to_string() - comp = comp.replace("^", "**") - complexity_class = "O(" + comp + ")" - coefficient = terms[max_index].coefficient - return_value[col + "_complexity"] = complexity_class - return_value[col + "_coefficient"] = coefficient + if len(parameters) == 1: + for simple_term in terms[max_index].simple_terms: + if comp == "": + comp += simple_term.to_string() + else: + comp = comp + "*" + simple_term.to_string() + comp = comp.replace("^", "**") + complexity_class = "O(" + comp + ")" + coefficient = terms[max_index].coefficient + return_value[col + "_complexity"] = complexity_class + return_value[col + "_coefficient"] = coefficient + else: + comp = "" + for parameter_term_pair in terms[max_index].parameter_term_pairs: + # [0] to get the index of the paramete + term_parameter_str = DEFAULT_PARAM_NAMES[parameter_term_pair[0]] + # [1] to get the term + if comp == "": + comp += parameter_term_pair[1].to_string( + parameter=term_parameter_str + ) + else: + comp = ( + comp + + "*" + + parameter_term_pair[1].to_string( + parameter=term_parameter_str + ) + ) + comp = comp.replace("^", "**") + complexity_class = "O(" + comp + ")" + return_value[col + "_complexity"] = complexity_class + return_value[col + "_coefficient"] = term.coefficient else: complexity_class = "O(1)" @@ -1206,15 +1234,14 @@ def _analyze_complexity( return return_value - # TODO: add multi parameter support def complexity_statsframe( - self, columns: list[str] = None, eval_target: float = None + self, columns: list[str] = None, eval_target: list[float] = None ) -> None: """Analyzes the complexity of the Extra-P models for the given thicket statsframe and the list of selected columns (metrics) for a given target evaluation scale. Then adds the results back into the statsframe. Args: columns (list[str], optional): A list of columns (metrics) that should be considered. Defaults to None. - eval_target (float, optional): The target scale the evaluation should be done for. Defaults to None. + eval_target (list[float], optional): The target scale the evaluation should be done for. Defaults to None. Raises: Exception: Raises an exception if the target scale is not provided. @@ -1225,6 +1252,10 @@ def complexity_statsframe( raise Exception( "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." ) + elif len(eval_target) != len(self.parameters): + raise Exception( + "The number of given parameter values for the evaluation target need to be the same as the number of model parameters." + ) else: # Use all Extra-P columns if columns is None: @@ -1252,7 +1283,9 @@ def complexity_statsframe( for col in columns: # Get list of components for this column components = [ - Modeling._analyze_complexity(model_obj, eval_target, col) + Modeling._analyze_complexity( + model_obj, eval_target, col, self.parameters + ) for model_obj in self.tht.statsframe.dataframe[col] ] From 7ae9a36017a87cb142ccd5bfb1b7b240e7d0f966 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Thu, 7 Sep 2023 11:44:58 -0700 Subject: [PATCH 24/57] fixed F402 import 'node' from line 21 shadowed by loop variable --- thicket/model_extrap.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index cdb48d5e..fb3ce5e6 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -871,9 +871,9 @@ def produce_models( # create the callpaths # NOTE: could add calltree later on, possibly from hatchet data if available - for node, _ in self.tht.dataframe.groupby(level=0): - if Callpath(node.frame["name"]) not in experiment.callpaths: - experiment.add_callpath(Callpath(node.frame["name"])) + for thicket_node, _ in self.tht.dataframe.groupby(level=0): + if Callpath(thicket_node.frame["name"]) not in experiment.callpaths: + experiment.add_callpath(Callpath(thicket_node.frame["name"])) # create the metrics for metric in self.metrics: From 42c300b9549c17449d00d3fd65ee56417c3f298b Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Thu, 7 Sep 2023 12:28:14 -0700 Subject: [PATCH 25/57] fixded F402 import 'node' from line 21 shadowed by loop variable --- thicket/model_extrap.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index fb3ce5e6..8723d070 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -890,8 +890,10 @@ def produce_models( values = [] callpath_exists = False # NOTE: potentially there is a better way to access the dataframes without looping - for node, single_node_df in self.tht.dataframe.groupby(level=0): - if Callpath(node.frame["name"]) == callpath: + for thicket_node, single_node_df in self.tht.dataframe.groupby( + level=0 + ): + if Callpath(thicket_node.frame["name"]) == callpath: callpath_exists = True coordinate_exists = False for profile, single_prof_df in single_node_df.groupby( @@ -993,7 +995,7 @@ def produce_models( ) if callpath_exists is False: raise ExtrapReaderException( - "The node/callpath '" + "The thicket node/callpath '" + str(callpath) + "' does not exist in any of the profiles.", profile, @@ -1036,17 +1038,17 @@ def produce_models( for callpath in experiment.callpaths: for metric in experiment.metrics: mkey = (callpath, metric) - for node, _ in self.tht.dataframe.groupby(level=0): - if Callpath(node.frame["name"]) == callpath: + for thicket_node, _ in self.tht.dataframe.groupby(level=0): + if Callpath(thicket_node.frame["name"]) == callpath: # catch key errors when queriying for models with a callpath, metric combination # that does not exist because there was no measurement object created for them try: self.tht.statsframe.dataframe.at[ - node, str(metric) + MODEL_TAG + thicket_node, str(metric) + MODEL_TAG ] = ModelWrapper(model_gen.models[mkey], self.parameters) # Add statistics to aggregated statistics table if add_stats: - self._add_extrap_statistics(node, str(metric)) + self._add_extrap_statistics(thicket_node, str(metric)) except Exception: pass From 8bcc01e915c2c34bcd74be6eb7d3d9f1e2523307 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Thu, 7 Sep 2023 20:19:40 -0700 Subject: [PATCH 26/57] added basic code for color coding complexity classes in hatchet tree --- thicket/model_extrap.py | 103 ++++++++++++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 25 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 8723d070..0f503a34 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -7,7 +7,7 @@ import copy from io import BytesIO -import matplotlib.pyplot as plt +import matplotlib as mpl from matplotlib.figure import Figure from matplotlib.axes import Axes from matplotlib import patches as mpatches @@ -192,8 +192,10 @@ def convert_function_to_scientific_notation(self, model_function: Function) -> s scientific_function = scientific_function.replace("*", "\\cdot") scientific_function = scientific_function.replace("(", "{") scientific_function = scientific_function.replace(")", "}") - scientific_function = scientific_function.replace("log2{p}", "\\log_2(p)") - scientific_function = scientific_function.replace("log2{q}", "\\log_2(q)") + scientific_function = scientific_function.replace( + "log2{p}", "\\log_2(p)") + scientific_function = scientific_function.replace( + "log2{q}", "\\log_2(q)") scientific_function = "$" + scientific_function + "$" return scientific_function @@ -226,7 +228,8 @@ def display_one_parameter_model( """ # sort based on x values - measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) + measures_sorted = sorted( + self.mdl.measurements, key=lambda x: x.coordinate[0]) # compute means, medians, mins, maxes params = [ms.coordinate[0] for ms in measures_sorted] # X values @@ -298,7 +301,8 @@ def display_one_parameter_model( from math import log2 # noqa: F401 y_vals_opt.append(eval(opt_scaling_func)) - ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, + label="optimal scaling", color="red") except Exception as e: print( "WARNING: optimal scaling curve could not be drawn. The function needs to be interpretable by the python eval() function and the parameters need to be the same as the ones shwon on the figures. See the following exception for more information: " @@ -310,7 +314,8 @@ def display_one_parameter_model( y_vals_opt = [] for _ in range(len(y_vals)): y_vals_opt.append(y_vals[0]) - ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, + label="optimal scaling", color="red") else: raise Exception( "Plotting the optimal scaling automatically is currently not supported for the chosen parameter." @@ -399,7 +404,8 @@ def draw_legend( ) handles.append(mark) - axis.legend(handles=handles, loc="center right", bbox_to_anchor=(2.75, 0.5)) + axis.legend(handles=handles, loc="center right", + bbox_to_anchor=(2.75, 0.5)) def display_two_parameter_model( self, @@ -431,7 +437,8 @@ def display_two_parameter_model( # sort based on x and y values measures_sorted = sorted( - self.mdl.measurements, key=lambda x: (x.coordinate[0], x.coordinate[1]) + self.mdl.measurements, key=lambda x: ( + x.coordinate[0], x.coordinate[1]) ) # get x, y value from measurements @@ -445,9 +452,11 @@ def display_two_parameter_model( maxes = [ms.maximum for ms in measures_sorted] # x value plotting range. Dynamic based off what the largest/smallest values are - x_vals = np.linspace(start=X_params[0], stop=1.5 * X_params[-1], num=100) + x_vals = np.linspace( + start=X_params[0], stop=1.5 * X_params[-1], num=100) # y value plotting range. Dynamic based off what the largest/smallest values are - y_vals = np.linspace(start=Y_params[0], stop=1.5 * Y_params[-1], num=100) + y_vals = np.linspace( + start=Y_params[0], stop=1.5 * Y_params[-1], num=100) x_vals, y_vals = np.meshgrid(x_vals, y_vals) z_vals = self.mdl.hypothesis.function.evaluate([x_vals, y_vals]) @@ -546,17 +555,21 @@ def opt_scaling_func_user(p, q): X_params, Y_params, medians, c="black", marker="x", label="median" ) if show_mean: - ax.scatter(X_params, Y_params, means, c="black", marker="+", label="mean") + ax.scatter(X_params, Y_params, means, + c="black", marker="+", label="mean") if show_min_max: - ax.scatter(X_params, Y_params, mins, c="black", marker="_", label="min") - ax.scatter(X_params, Y_params, maxes, c="black", marker="_", label="max") + ax.scatter(X_params, Y_params, mins, + c="black", marker="_", label="min") + ax.scatter(X_params, Y_params, maxes, + c="black", marker="_", label="max") # Draw connecting line for min, max -> error bars line_x, line_y, line_z = [], [], [] for x, y, min_v, max_v in zip(X_params, Y_params, mins, maxes): line_x.append(x), line_x.append(x) line_y.append(y), line_y.append(y) line_z.append(min_v), line_z.append(max_v) - line_x.append(np.nan), line_y.append(np.nan), line_z.append(np.nan) + line_x.append(np.nan), line_y.append( + np.nan), line_z.append(np.nan) ax.plot(line_x, line_y, line_z, color="black") # axis labels and title @@ -760,7 +773,8 @@ def model_to_img_html(model_obj: Model) -> str: fig.savefig(figfile, format="jpg", transparent=False) figfile.seek(0) figdata_jpg = base64.b64encode(figfile.getvalue()).decode() - imgstr = ''.format(figdata_jpg) + imgstr = ''.format( + figdata_jpg) plt.close(fig) return imgstr @@ -776,7 +790,8 @@ def model_to_img_html(model_obj: Model) -> str: except KeyError: pass - frm_dict = {met + MODEL_TAG: model_to_img_html for met in existing_metrics} + frm_dict = { + met + MODEL_TAG: model_to_img_html for met in existing_metrics} # Subset of the aggregated statistics table with only the Extra-P columns selected # TODO: to_html(escape=False, formatters=frm_dict), the formatter does not work for 3D stuff. @@ -848,7 +863,8 @@ def produce_models( experiment.add_parameter(Parameter(parameter)) # Ordering of profiles in the performance data table - ensemble_profile_ordering = list(self.tht.dataframe.index.unique(level=1)) + ensemble_profile_ordering = list( + self.tht.dataframe.index.unique(level=1)) profile_parameter_value_mapping = {} for profile in ensemble_profile_ordering: @@ -919,7 +935,8 @@ def produce_models( ): coordinate_exists = True try: - value = single_prof_df[str(metric)].tolist() + value = single_prof_df[str( + metric)].tolist() except Exception: raise ExtrapReaderException( "The metric '" @@ -937,7 +954,8 @@ def produce_models( # read out scaling parameter for total metric value calculation # if the resource allocation is static if scaling_parameter.isnumeric(): - ranks = int(scaling_parameter) + ranks = int( + scaling_parameter) # otherwise read number of ranks from the provided parameter else: # check if the parameter exists @@ -970,7 +988,8 @@ def produce_models( + ".", profile, ) - values.append(value[0] * ranks) + values.append( + value[0] * ranks) # add values for all other metrics else: values.append(value[0]) @@ -1048,7 +1067,8 @@ def produce_models( ] = ModelWrapper(model_gen.models[mkey], self.parameters) # Add statistics to aggregated statistics table if add_stats: - self._add_extrap_statistics(thicket_node, str(metric)) + self._add_extrap_statistics( + thicket_node, str(metric)) except Exception: pass @@ -1076,7 +1096,8 @@ def _componentize_function( for term in fnc.compound_terms: if len(parameters) == 1: # Join variables of the same term together - variable_column = " * ".join(t.to_string() for t in term.simple_terms) + variable_column = " * ".join(t.to_string() + for t in term.simple_terms) term_dict[variable_column] = term.coefficient else: @@ -1282,6 +1303,7 @@ def complexity_statsframe( # Process each column all_dfs = [] + all_dfs_columns = [] for col in columns: # Get list of components for this column components = [ @@ -1297,9 +1319,29 @@ def complexity_statsframe( ) # Add column name as index level - # comp_df.columns = "[col], comp_df.columns.to_list()" + all_dfs_columns.append(comp_df.columns) all_dfs.append(comp_df) + # add color coding for complexity classes to data frame + color_map_dict = {} + column_key = None + for col in all_dfs_columns[0].values: + if "model_complexity" in col: + column_key = col + break + unique_classes = [] + complexity_list = [] + for i in range(len(all_dfs[0][column_key])): + complexity_list.append(all_dfs[0][column_key][i]) + if all_dfs[0][column_key][i] not in unique_classes: + unique_classes.append(all_dfs[0][column_key][i]) + range_values = np.arange(0, 1, 1/len(unique_classes)) + cmap = mpl.cm.get_cmap('Spectral') + for i in range(len(range_values)): + range_val = range_values[i] + rgba = cmap(range_val) + color_map_dict[unique_classes[i]] = rgba + # Concatenate dataframes horizontally all_dfs.insert(0, self.tht.statsframe.dataframe) self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) @@ -1338,10 +1380,21 @@ def complexity_statsframe( data=ranking_list, index=self.tht.statsframe.dataframe.index ) - # Add column name as index level - # comp_df.columns = "[col], comp_df.columns.to_list()" all_dfs.append(comp_df) + color_list = [] + for i in range(len(complexity_list)): + color_list_dict = {} + color_list_dict[col + + "_colormapping"] = color_map_dict[complexity_list[i]] + color_list.append(color_list_dict) + + color_map_df = pd.DataFrame( + data=color_list, index=self.tht.statsframe.dataframe.index + ) + + all_dfs.append(color_map_df) + # Concatenate dataframes horizontally all_dfs.insert(0, self.tht.statsframe.dataframe) self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) From 1bd89c479417d926f55a3357fa3297ee77c79c2d Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Fri, 8 Sep 2023 13:25:41 -0700 Subject: [PATCH 27/57] added colormapping support for complexity analysis with tree function --- thicket/model_extrap.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 0f503a34..077ede49 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -1336,11 +1336,20 @@ def complexity_statsframe( if all_dfs[0][column_key][i] not in unique_classes: unique_classes.append(all_dfs[0][column_key][i]) range_values = np.arange(0, 1, 1/len(unique_classes)) - cmap = mpl.cm.get_cmap('Spectral') + # cmap = mpl.cm.get_cmap('brg') + cmap = mpl.cm.get_cmap('tab20b') for i in range(len(range_values)): range_val = range_values[i] rgba = cmap(range_val) - color_map_dict[unique_classes[i]] = rgba + red = rgba[0] + green = rgba[1] + blue = rgba[2] + red = int(red / (1 / 255)) + green = int(green / (1 / 255)) + blue = int(blue / (1 / 255)) + ansi_color_str = "\033[38;2;" + \ + str(red)+";"+str(green)+";"+str(blue)+"m" + color_map_dict[unique_classes[i]] = ansi_color_str # Concatenate dataframes horizontally all_dfs.insert(0, self.tht.statsframe.dataframe) From 0dc7d24f48c3d12f421ccaa553d7a57823748cc4 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Fri, 8 Sep 2023 13:59:00 -0700 Subject: [PATCH 28/57] black --- thicket/model_extrap.py | 77 ++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 48 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 077ede49..3f18efba 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -192,10 +192,8 @@ def convert_function_to_scientific_notation(self, model_function: Function) -> s scientific_function = scientific_function.replace("*", "\\cdot") scientific_function = scientific_function.replace("(", "{") scientific_function = scientific_function.replace(")", "}") - scientific_function = scientific_function.replace( - "log2{p}", "\\log_2(p)") - scientific_function = scientific_function.replace( - "log2{q}", "\\log_2(q)") + scientific_function = scientific_function.replace("log2{p}", "\\log_2(p)") + scientific_function = scientific_function.replace("log2{q}", "\\log_2(q)") scientific_function = "$" + scientific_function + "$" return scientific_function @@ -228,8 +226,7 @@ def display_one_parameter_model( """ # sort based on x values - measures_sorted = sorted( - self.mdl.measurements, key=lambda x: x.coordinate[0]) + measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) # compute means, medians, mins, maxes params = [ms.coordinate[0] for ms in measures_sorted] # X values @@ -301,8 +298,7 @@ def display_one_parameter_model( from math import log2 # noqa: F401 y_vals_opt.append(eval(opt_scaling_func)) - ax.plot(x_vals, y_vals_opt, - label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") except Exception as e: print( "WARNING: optimal scaling curve could not be drawn. The function needs to be interpretable by the python eval() function and the parameters need to be the same as the ones shwon on the figures. See the following exception for more information: " @@ -314,8 +310,7 @@ def display_one_parameter_model( y_vals_opt = [] for _ in range(len(y_vals)): y_vals_opt.append(y_vals[0]) - ax.plot(x_vals, y_vals_opt, - label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") else: raise Exception( "Plotting the optimal scaling automatically is currently not supported for the chosen parameter." @@ -404,8 +399,7 @@ def draw_legend( ) handles.append(mark) - axis.legend(handles=handles, loc="center right", - bbox_to_anchor=(2.75, 0.5)) + axis.legend(handles=handles, loc="center right", bbox_to_anchor=(2.75, 0.5)) def display_two_parameter_model( self, @@ -437,8 +431,7 @@ def display_two_parameter_model( # sort based on x and y values measures_sorted = sorted( - self.mdl.measurements, key=lambda x: ( - x.coordinate[0], x.coordinate[1]) + self.mdl.measurements, key=lambda x: (x.coordinate[0], x.coordinate[1]) ) # get x, y value from measurements @@ -452,11 +445,9 @@ def display_two_parameter_model( maxes = [ms.maximum for ms in measures_sorted] # x value plotting range. Dynamic based off what the largest/smallest values are - x_vals = np.linspace( - start=X_params[0], stop=1.5 * X_params[-1], num=100) + x_vals = np.linspace(start=X_params[0], stop=1.5 * X_params[-1], num=100) # y value plotting range. Dynamic based off what the largest/smallest values are - y_vals = np.linspace( - start=Y_params[0], stop=1.5 * Y_params[-1], num=100) + y_vals = np.linspace(start=Y_params[0], stop=1.5 * Y_params[-1], num=100) x_vals, y_vals = np.meshgrid(x_vals, y_vals) z_vals = self.mdl.hypothesis.function.evaluate([x_vals, y_vals]) @@ -555,21 +546,17 @@ def opt_scaling_func_user(p, q): X_params, Y_params, medians, c="black", marker="x", label="median" ) if show_mean: - ax.scatter(X_params, Y_params, means, - c="black", marker="+", label="mean") + ax.scatter(X_params, Y_params, means, c="black", marker="+", label="mean") if show_min_max: - ax.scatter(X_params, Y_params, mins, - c="black", marker="_", label="min") - ax.scatter(X_params, Y_params, maxes, - c="black", marker="_", label="max") + ax.scatter(X_params, Y_params, mins, c="black", marker="_", label="min") + ax.scatter(X_params, Y_params, maxes, c="black", marker="_", label="max") # Draw connecting line for min, max -> error bars line_x, line_y, line_z = [], [], [] for x, y, min_v, max_v in zip(X_params, Y_params, mins, maxes): line_x.append(x), line_x.append(x) line_y.append(y), line_y.append(y) line_z.append(min_v), line_z.append(max_v) - line_x.append(np.nan), line_y.append( - np.nan), line_z.append(np.nan) + line_x.append(np.nan), line_y.append(np.nan), line_z.append(np.nan) ax.plot(line_x, line_y, line_z, color="black") # axis labels and title @@ -773,8 +760,7 @@ def model_to_img_html(model_obj: Model) -> str: fig.savefig(figfile, format="jpg", transparent=False) figfile.seek(0) figdata_jpg = base64.b64encode(figfile.getvalue()).decode() - imgstr = ''.format( - figdata_jpg) + imgstr = ''.format(figdata_jpg) plt.close(fig) return imgstr @@ -790,8 +776,7 @@ def model_to_img_html(model_obj: Model) -> str: except KeyError: pass - frm_dict = { - met + MODEL_TAG: model_to_img_html for met in existing_metrics} + frm_dict = {met + MODEL_TAG: model_to_img_html for met in existing_metrics} # Subset of the aggregated statistics table with only the Extra-P columns selected # TODO: to_html(escape=False, formatters=frm_dict), the formatter does not work for 3D stuff. @@ -863,8 +848,7 @@ def produce_models( experiment.add_parameter(Parameter(parameter)) # Ordering of profiles in the performance data table - ensemble_profile_ordering = list( - self.tht.dataframe.index.unique(level=1)) + ensemble_profile_ordering = list(self.tht.dataframe.index.unique(level=1)) profile_parameter_value_mapping = {} for profile in ensemble_profile_ordering: @@ -935,8 +919,7 @@ def produce_models( ): coordinate_exists = True try: - value = single_prof_df[str( - metric)].tolist() + value = single_prof_df[str(metric)].tolist() except Exception: raise ExtrapReaderException( "The metric '" @@ -954,8 +937,7 @@ def produce_models( # read out scaling parameter for total metric value calculation # if the resource allocation is static if scaling_parameter.isnumeric(): - ranks = int( - scaling_parameter) + ranks = int(scaling_parameter) # otherwise read number of ranks from the provided parameter else: # check if the parameter exists @@ -988,8 +970,7 @@ def produce_models( + ".", profile, ) - values.append( - value[0] * ranks) + values.append(value[0] * ranks) # add values for all other metrics else: values.append(value[0]) @@ -1067,8 +1048,7 @@ def produce_models( ] = ModelWrapper(model_gen.models[mkey], self.parameters) # Add statistics to aggregated statistics table if add_stats: - self._add_extrap_statistics( - thicket_node, str(metric)) + self._add_extrap_statistics(thicket_node, str(metric)) except Exception: pass @@ -1096,8 +1076,7 @@ def _componentize_function( for term in fnc.compound_terms: if len(parameters) == 1: # Join variables of the same term together - variable_column = " * ".join(t.to_string() - for t in term.simple_terms) + variable_column = " * ".join(t.to_string() for t in term.simple_terms) term_dict[variable_column] = term.coefficient else: @@ -1335,9 +1314,9 @@ def complexity_statsframe( complexity_list.append(all_dfs[0][column_key][i]) if all_dfs[0][column_key][i] not in unique_classes: unique_classes.append(all_dfs[0][column_key][i]) - range_values = np.arange(0, 1, 1/len(unique_classes)) + range_values = np.arange(0, 1, 1 / len(unique_classes)) # cmap = mpl.cm.get_cmap('brg') - cmap = mpl.cm.get_cmap('tab20b') + cmap = mpl.cm.get_cmap("tab20b") for i in range(len(range_values)): range_val = range_values[i] rgba = cmap(range_val) @@ -1347,8 +1326,9 @@ def complexity_statsframe( red = int(red / (1 / 255)) green = int(green / (1 / 255)) blue = int(blue / (1 / 255)) - ansi_color_str = "\033[38;2;" + \ - str(red)+";"+str(green)+";"+str(blue)+"m" + ansi_color_str = ( + "\033[38;2;" + str(red) + ";" + str(green) + ";" + str(blue) + "m" + ) color_map_dict[unique_classes[i]] = ansi_color_str # Concatenate dataframes horizontally @@ -1394,8 +1374,9 @@ def complexity_statsframe( color_list = [] for i in range(len(complexity_list)): color_list_dict = {} - color_list_dict[col + - "_colormapping"] = color_map_dict[complexity_list[i]] + color_list_dict[col + "_colormapping"] = color_map_dict[ + complexity_list[i] + ] color_list.append(color_list_dict) color_map_df = pd.DataFrame( From cb69c822cf8562cdec63daec391a02ff7c4bf627 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Fri, 8 Sep 2023 14:10:53 -0700 Subject: [PATCH 29/57] fixed F821 undefined name 'plt' --- thicket/model_extrap.py | 1 + 1 file changed, 1 insertion(+) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 3f18efba..35574edd 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -8,6 +8,7 @@ from io import BytesIO import matplotlib as mpl +import matplotlib.pyplot as plt from matplotlib.figure import Figure from matplotlib.axes import Axes from matplotlib import patches as mpatches From 73a27d01c731e359b761c7e7a3209b23f742a7d0 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Fri, 8 Sep 2023 14:20:04 -0700 Subject: [PATCH 30/57] changed tuple type hints to be compatible with Python3.8 --- thicket/model_extrap.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 35574edd..8687cad9 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -7,6 +7,8 @@ import copy from io import BytesIO +from typing import Tuple + import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib.figure import Figure @@ -207,7 +209,7 @@ def display_one_parameter_model( AR2: bool = False, show_opt_scaling: bool = False, opt_scaling_func: str = None, - ) -> tuple[Figure, Axes]: + ) -> Tuple[Figure, Axes]: """Display function to visualize performance models with one model parameter. Args: @@ -411,7 +413,7 @@ def display_two_parameter_model( AR2: bool = False, show_opt_scaling: bool = False, opt_scaling_func: str = None, - ) -> tuple[Figure, Axes]: + ) -> Tuple[Figure, Axes]: """Display function to visualize performance models with two model parameters. Args: @@ -623,7 +625,7 @@ def display( AR2: bool = False, show_opt_scaling: bool = False, opt_scaling_func: str = None, - ) -> tuple[Figure, Axes]: + ) -> Tuple[Figure, Axes]: """General display function for visualizing a performance model. Calls the specific display function depending on the number of found model parameters automatically. From 448995c78431f822b04a4e8809ecb5529fe06fe6 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Wed, 13 Sep 2023 12:13:44 -0700 Subject: [PATCH 31/57] load DEFAULT_PARAM_NAMES from extrap directly to improve compatability --- thicket/model_extrap.py | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 8687cad9..0cc37482 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -34,38 +34,10 @@ from extrap.entities.coordinate import Coordinate from extrap.entities.model import Model from extrap.entities.functions import Function +from extrap.entities.terms import DEFAULT_PARAM_NAMES MODEL_TAG = "_extrap-model" -DEFAULT_PARAM_NAMES = ( - "p", - "q", - "r", - "s", - "t", - "u", - "v", - "w", - "x", - "y", - "z", - "a", - "b", - "c", - "d", - "e", - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", -) - class ExtrapReaderException(Exception): """Custom exception class for raising exceptions while reading in data from From a31e453e012e00618c8fc78fe863ffb4bed31e25 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Wed, 13 Sep 2023 15:55:24 -0700 Subject: [PATCH 32/57] multi eval target support for complexity analysis statsframe function --- thicket/model_extrap.py | 387 +++++++++++++++++++++++----------------- 1 file changed, 224 insertions(+), 163 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 0cc37482..8930700a 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -167,8 +167,10 @@ def convert_function_to_scientific_notation(self, model_function: Function) -> s scientific_function = scientific_function.replace("*", "\\cdot") scientific_function = scientific_function.replace("(", "{") scientific_function = scientific_function.replace(")", "}") - scientific_function = scientific_function.replace("log2{p}", "\\log_2(p)") - scientific_function = scientific_function.replace("log2{q}", "\\log_2(q)") + scientific_function = scientific_function.replace( + "log2{p}", "\\log_2(p)") + scientific_function = scientific_function.replace( + "log2{q}", "\\log_2(q)") scientific_function = "$" + scientific_function + "$" return scientific_function @@ -201,7 +203,8 @@ def display_one_parameter_model( """ # sort based on x values - measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) + measures_sorted = sorted( + self.mdl.measurements, key=lambda x: x.coordinate[0]) # compute means, medians, mins, maxes params = [ms.coordinate[0] for ms in measures_sorted] # X values @@ -273,7 +276,8 @@ def display_one_parameter_model( from math import log2 # noqa: F401 y_vals_opt.append(eval(opt_scaling_func)) - ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, + label="optimal scaling", color="red") except Exception as e: print( "WARNING: optimal scaling curve could not be drawn. The function needs to be interpretable by the python eval() function and the parameters need to be the same as the ones shwon on the figures. See the following exception for more information: " @@ -285,7 +289,8 @@ def display_one_parameter_model( y_vals_opt = [] for _ in range(len(y_vals)): y_vals_opt.append(y_vals[0]) - ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, + label="optimal scaling", color="red") else: raise Exception( "Plotting the optimal scaling automatically is currently not supported for the chosen parameter." @@ -374,7 +379,8 @@ def draw_legend( ) handles.append(mark) - axis.legend(handles=handles, loc="center right", bbox_to_anchor=(2.75, 0.5)) + axis.legend(handles=handles, loc="center right", + bbox_to_anchor=(2.75, 0.5)) def display_two_parameter_model( self, @@ -406,7 +412,8 @@ def display_two_parameter_model( # sort based on x and y values measures_sorted = sorted( - self.mdl.measurements, key=lambda x: (x.coordinate[0], x.coordinate[1]) + self.mdl.measurements, key=lambda x: ( + x.coordinate[0], x.coordinate[1]) ) # get x, y value from measurements @@ -420,9 +427,11 @@ def display_two_parameter_model( maxes = [ms.maximum for ms in measures_sorted] # x value plotting range. Dynamic based off what the largest/smallest values are - x_vals = np.linspace(start=X_params[0], stop=1.5 * X_params[-1], num=100) + x_vals = np.linspace( + start=X_params[0], stop=1.5 * X_params[-1], num=100) # y value plotting range. Dynamic based off what the largest/smallest values are - y_vals = np.linspace(start=Y_params[0], stop=1.5 * Y_params[-1], num=100) + y_vals = np.linspace( + start=Y_params[0], stop=1.5 * Y_params[-1], num=100) x_vals, y_vals = np.meshgrid(x_vals, y_vals) z_vals = self.mdl.hypothesis.function.evaluate([x_vals, y_vals]) @@ -440,7 +449,7 @@ def opt_scaling_func_user(p, q): plt.ioff() fig = plt.figure() - ax = fig.gca(projection="3d") + ax = fig.add_subplot(projection="3d") if show_opt_scaling: # if the user provides a custom scaling function @@ -521,17 +530,21 @@ def opt_scaling_func_user(p, q): X_params, Y_params, medians, c="black", marker="x", label="median" ) if show_mean: - ax.scatter(X_params, Y_params, means, c="black", marker="+", label="mean") + ax.scatter(X_params, Y_params, means, + c="black", marker="+", label="mean") if show_min_max: - ax.scatter(X_params, Y_params, mins, c="black", marker="_", label="min") - ax.scatter(X_params, Y_params, maxes, c="black", marker="_", label="max") + ax.scatter(X_params, Y_params, mins, + c="black", marker="_", label="min") + ax.scatter(X_params, Y_params, maxes, + c="black", marker="_", label="max") # Draw connecting line for min, max -> error bars line_x, line_y, line_z = [], [], [] for x, y, min_v, max_v in zip(X_params, Y_params, mins, maxes): line_x.append(x), line_x.append(x) line_y.append(y), line_y.append(y) line_z.append(min_v), line_z.append(max_v) - line_x.append(np.nan), line_y.append(np.nan), line_z.append(np.nan) + line_x.append(np.nan), line_y.append( + np.nan), line_z.append(np.nan) ax.plot(line_x, line_y, line_z, color="black") # axis labels and title @@ -735,7 +748,8 @@ def model_to_img_html(model_obj: Model) -> str: fig.savefig(figfile, format="jpg", transparent=False) figfile.seek(0) figdata_jpg = base64.b64encode(figfile.getvalue()).decode() - imgstr = ''.format(figdata_jpg) + imgstr = ''.format( + figdata_jpg) plt.close(fig) return imgstr @@ -751,7 +765,8 @@ def model_to_img_html(model_obj: Model) -> str: except KeyError: pass - frm_dict = {met + MODEL_TAG: model_to_img_html for met in existing_metrics} + frm_dict = { + met + MODEL_TAG: model_to_img_html for met in existing_metrics} # Subset of the aggregated statistics table with only the Extra-P columns selected # TODO: to_html(escape=False, formatters=frm_dict), the formatter does not work for 3D stuff. @@ -823,7 +838,8 @@ def produce_models( experiment.add_parameter(Parameter(parameter)) # Ordering of profiles in the performance data table - ensemble_profile_ordering = list(self.tht.dataframe.index.unique(level=1)) + ensemble_profile_ordering = list( + self.tht.dataframe.index.unique(level=1)) profile_parameter_value_mapping = {} for profile in ensemble_profile_ordering: @@ -894,7 +910,8 @@ def produce_models( ): coordinate_exists = True try: - value = single_prof_df[str(metric)].tolist() + value = single_prof_df[str( + metric)].tolist() except Exception: raise ExtrapReaderException( "The metric '" @@ -912,7 +929,8 @@ def produce_models( # read out scaling parameter for total metric value calculation # if the resource allocation is static if scaling_parameter.isnumeric(): - ranks = int(scaling_parameter) + ranks = int( + scaling_parameter) # otherwise read number of ranks from the provided parameter else: # check if the parameter exists @@ -945,7 +963,8 @@ def produce_models( + ".", profile, ) - values.append(value[0] * ranks) + values.append( + value[0] * ranks) # add values for all other metrics else: values.append(value[0]) @@ -1023,7 +1042,8 @@ def produce_models( ] = ModelWrapper(model_gen.models[mkey], self.parameters) # Add statistics to aggregated statistics table if add_stats: - self._add_extrap_statistics(thicket_node, str(metric)) + self._add_extrap_statistics( + thicket_node, str(metric)) except Exception: pass @@ -1051,7 +1071,8 @@ def _componentize_function( for term in fnc.compound_terms: if len(parameters) == 1: # Join variables of the same term together - variable_column = " * ".join(t.to_string() for t in term.simple_terms) + variable_column = " * ".join(t.to_string() + for t in term.simple_terms) term_dict[variable_column] = term.coefficient else: @@ -1084,7 +1105,7 @@ def componentize_statsframe(self, columns: list[str] = None) -> None: columns = [ col for col in self.tht.statsframe.dataframe - if isinstance(self.tht.statsframe.dataframe[col][0], ModelWrapper) + if isinstance(self.tht.statsframe.dataframe[col].iloc[0], ModelWrapper) ] # Error checking @@ -1093,7 +1114,7 @@ def componentize_statsframe(self, columns: list[str] = None) -> None: raise ValueError( "column " + c + " is not in the aggregated statistics table." ) - elif not isinstance(self.tht.statsframe.dataframe[c][0], ModelWrapper): + elif not isinstance(self.tht.statsframe.dataframe[c].iloc[0], ModelWrapper): raise TypeError( "column " + c @@ -1147,11 +1168,18 @@ def _analyze_complexity( term_values = [] terms = [] + target_str = "(" + for param_value in eval_target: + target_str += str(param_value) + target_str += "," + target_str = target_str[:-1] + target_str += ")" + if len(fnc.compound_terms) == 0: - complexity_class = "O(1)" + complexity_class = "1" coefficient = fnc.constant_coefficient - return_value[col + "_complexity"] = complexity_class - return_value[col + "_coefficient"] = coefficient + return_value[col + "_complexity_" + target_str] = complexity_class + return_value[col + "_coefficient_" + target_str] = coefficient else: if len(parameters) == 1: @@ -1176,10 +1204,12 @@ def _analyze_complexity( else: comp = comp + "*" + simple_term.to_string() comp = comp.replace("^", "**") - complexity_class = "O(" + comp + ")" + complexity_class = "" + comp + "" coefficient = terms[max_index].coefficient - return_value[col + "_complexity"] = complexity_class - return_value[col + "_coefficient"] = coefficient + return_value[col + "_complexity_" + + target_str] = complexity_class + return_value[col + "_coefficient_" + + target_str] = coefficient else: comp = "" for parameter_term_pair in terms[max_index].parameter_term_pairs: @@ -1199,170 +1229,201 @@ def _analyze_complexity( ) ) comp = comp.replace("^", "**") - complexity_class = "O(" + comp + ")" - return_value[col + "_complexity"] = complexity_class - return_value[col + "_coefficient"] = term.coefficient + complexity_class = "" + comp + "" + return_value[col + "_complexity_" + + target_str] = complexity_class + return_value[col + "_coefficient_" + + target_str] = term.coefficient else: - complexity_class = "O(1)" + complexity_class = "1" coefficient = fnc.constant_coefficient - return_value[col + "_complexity"] = complexity_class - return_value[col + "_coefficient"] = coefficient + return_value[col + "_complexity_" + + target_str] = complexity_class + return_value[col + "_coefficient_" + target_str] = coefficient return return_value + from typing import List + def complexity_statsframe( - self, columns: list[str] = None, eval_target: list[float] = None + self, columns: list[str] = None, eval_targets: list[list[float]] = None ) -> None: """Analyzes the complexity of the Extra-P models for the given thicket statsframe and the list of selected columns (metrics) for a given target evaluation scale. Then adds the results back into the statsframe. Args: columns (list[str], optional): A list of columns (metrics) that should be considered. Defaults to None. - eval_target (list[float], optional): The target scale the evaluation should be done for. Defaults to None. + eval_targets (list[list(float)], optional): A list of target scales (parameter-value list) the evaluation should be done for. Defaults to None. Raises: Exception: Raises an exception if the target scale is not provided. ValueError: Raises a ValueError is not in the aggregates statistics table. TypeError: Raises a TypeError if the column is not of the right type. """ - if eval_target is None: + targets = [] + if eval_targets is None: raise Exception( "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." ) - elif len(eval_target) != len(self.parameters): - raise Exception( - "The number of given parameter values for the evaluation target need to be the same as the number of model parameters." - ) - else: - # Use all Extra-P columns - if columns is None: - columns = [ - col - for col in self.tht.statsframe.dataframe - if isinstance(self.tht.statsframe.dataframe[col][0], ModelWrapper) - ] + elif len(eval_targets) > 0: - # Error checking - for c in columns: - if c not in self.tht.statsframe.dataframe.columns: - raise ValueError( - "column " + c + " is not in the aggregated statistics table." - ) - elif not isinstance(self.tht.statsframe.dataframe[c][0], ModelWrapper): - raise TypeError( - "column " - + c - + " is not the right type (thicket.model_extrap.ModelWrapper)." - ) + # for each evaluation target check if the number of values matches the number of parameters + for target in eval_targets: + if len(target) != len(self.parameters): + print( + "The number of given parameter values for the evaluation target need to be the same as the number of model parameters.") + else: + targets.append(target) + + # if there are targets to evaluate for + if len(targets) > 0: + + for target in targets: + + target_str = "(" + for param_value in target: + target_str += str(param_value) + target_str += "," + target_str = target_str[:-1] + target_str += ")" + + # Use all Extra-P columns + if columns is None: + columns = [ + col + for col in self.tht.statsframe.dataframe + if isinstance(self.tht.statsframe.dataframe[col].iloc[0], ModelWrapper) + ] - # Process each column - all_dfs = [] - all_dfs_columns = [] - for col in columns: - # Get list of components for this column - components = [ - Modeling._analyze_complexity( - model_obj, eval_target, col, self.parameters - ) - for model_obj in self.tht.statsframe.dataframe[col] - ] + # Error checking + for c in columns: + if c not in self.tht.statsframe.dataframe.columns: + raise ValueError( + "column " + c + " is not in the aggregated statistics table." + ) + elif not isinstance(self.tht.statsframe.dataframe[c].iloc[0], ModelWrapper): + raise TypeError( + "column " + + c + + " is not the right type (thicket.model_extrap.ModelWrapper)." + ) - # Component dataframe - comp_df = pd.DataFrame( - data=components, index=self.tht.statsframe.dataframe.index - ) + # Process each column + all_dfs = [] + all_dfs_columns = [] + for col in columns: + # Get list of components for this column + components = [ + Modeling._analyze_complexity( + model_obj, target, col, self.parameters + ) + for model_obj in self.tht.statsframe.dataframe[col] + ] - # Add column name as index level - all_dfs_columns.append(comp_df.columns) - all_dfs.append(comp_df) - - # add color coding for complexity classes to data frame - color_map_dict = {} - column_key = None - for col in all_dfs_columns[0].values: - if "model_complexity" in col: - column_key = col - break - unique_classes = [] - complexity_list = [] - for i in range(len(all_dfs[0][column_key])): - complexity_list.append(all_dfs[0][column_key][i]) - if all_dfs[0][column_key][i] not in unique_classes: - unique_classes.append(all_dfs[0][column_key][i]) - range_values = np.arange(0, 1, 1 / len(unique_classes)) - # cmap = mpl.cm.get_cmap('brg') - cmap = mpl.cm.get_cmap("tab20b") - for i in range(len(range_values)): - range_val = range_values[i] - rgba = cmap(range_val) - red = rgba[0] - green = rgba[1] - blue = rgba[2] - red = int(red / (1 / 255)) - green = int(green / (1 / 255)) - blue = int(blue / (1 / 255)) - ansi_color_str = ( - "\033[38;2;" + str(red) + ";" + str(green) + ";" + str(blue) + "m" - ) - color_map_dict[unique_classes[i]] = ansi_color_str - - # Concatenate dataframes horizontally - all_dfs.insert(0, self.tht.statsframe.dataframe) - self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) - - # Add callpath ranking to the dataframe - all_dfs = [] - for col in columns: - total_metric_value = 0 - metric_values = [] - for model_obj in self.tht.statsframe.dataframe[col]: - metric_value = model_obj.mdl.hypothesis.function.evaluate( - eval_target + # Component dataframe + comp_df = pd.DataFrame( + data=components, index=self.tht.statsframe.dataframe.index ) - total_metric_value += metric_value - metric_values.append(metric_value) - percentages = [] - for value in metric_values: - percentage = value / (total_metric_value / 100) - if percentage < 0: - percentages.append(0) - else: - percentages.append(percentage) - reverse_ranking = len(percentages) - rankdata( - percentages, method="ordinal" - ).astype(int) - for i in range(len(reverse_ranking)): - reverse_ranking[i] += 1 - ranking_list = [] - for i in range(len(reverse_ranking)): - ranking_dict = {} - ranking_dict[col + "_growth_rank"] = reverse_ranking[i] - ranking_list.append(ranking_dict) - - # Component dataframe - comp_df = pd.DataFrame( - data=ranking_list, index=self.tht.statsframe.dataframe.index - ) - all_dfs.append(comp_df) + # Add column name as index level + all_dfs_columns.append(comp_df.columns) + all_dfs.append(comp_df) + + # add color coding for complexity classes to data frame + color_map_dict = {} + column_key = None + for col in all_dfs_columns[0].values: + if "model_complexity" in col: + column_key = col + break + unique_classes = [] + complexity_list = [] + for i in range(len(all_dfs[0][column_key])): + complexity_list.append(all_dfs[0][column_key].iloc[i]) + if all_dfs[0][column_key].iloc[i] not in unique_classes: + unique_classes.append(all_dfs[0][column_key].iloc[i]) + range_values = np.arange(0, 1, 1 / len(unique_classes)) + # cmap = mpl.cm.get_cmap('brg') + cmap = mpl.cm.get_cmap("tab20b") + for i in range(len(range_values)): + range_val = range_values[i] + rgba = cmap(range_val) + red = rgba[0] + green = rgba[1] + blue = rgba[2] + red = int(red / (1 / 255)) + green = int(green / (1 / 255)) + blue = int(blue / (1 / 255)) + ansi_color_str = ( + "\033[38;2;" + str(red) + ";" + + str(green) + ";" + str(blue) + "m" + ) + color_map_dict[unique_classes[i]] = ansi_color_str + + # Concatenate dataframes horizontally + all_dfs.insert(0, self.tht.statsframe.dataframe) + self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) + + # Add callpath ranking to the dataframe + all_dfs = [] + for col in columns: + total_metric_value = 0 + metric_values = [] + for model_obj in self.tht.statsframe.dataframe[col]: + metric_value = model_obj.mdl.hypothesis.function.evaluate( + target + ) + total_metric_value += metric_value + metric_values.append(metric_value) + percentages = [] + for value in metric_values: + percentage = value / (total_metric_value / 100) + if percentage < 0: + percentages.append(0) + else: + percentages.append(percentage) + reverse_ranking = len(percentages) - rankdata( + percentages, method="ordinal" + ).astype(int) + for i in range(len(reverse_ranking)): + reverse_ranking[i] += 1 + ranking_list = [] + for i in range(len(reverse_ranking)): + ranking_dict = {} + ranking_dict[col + "_growth_rank_" + + target_str] = reverse_ranking[i] + ranking_list.append(ranking_dict) + + # Component dataframe + comp_df = pd.DataFrame( + data=ranking_list, index=self.tht.statsframe.dataframe.index + ) - color_list = [] - for i in range(len(complexity_list)): - color_list_dict = {} - color_list_dict[col + "_colormapping"] = color_map_dict[ - complexity_list[i] - ] - color_list.append(color_list_dict) + all_dfs.append(comp_df) - color_map_df = pd.DataFrame( - data=color_list, index=self.tht.statsframe.dataframe.index - ) + color_list = [] + for i in range(len(complexity_list)): + color_list_dict = {} + color_list_dict[col + "_colormapping_" + target_str] = color_map_dict[ + complexity_list[i] + ] + color_list.append(color_list_dict) + + color_map_df = pd.DataFrame( + data=color_list, index=self.tht.statsframe.dataframe.index + ) - all_dfs.append(color_map_df) + all_dfs.append(color_map_df) - # Concatenate dataframes horizontally - all_dfs.insert(0, self.tht.statsframe.dataframe) - self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) + # Concatenate dataframes horizontally + all_dfs.insert(0, self.tht.statsframe.dataframe) + self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) + + # otherwise rais Exception + else: + raise Exception( + "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." + ) def phase_statsframe( self, columns: list[str] = None, eval_target: float = None From dbe438ff2622c08b913f47a8cdae6e1031cace57 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Wed, 13 Sep 2023 18:07:14 -0700 Subject: [PATCH 33/57] fixed multi-parameter visualization bug for constant functions --- thicket/model_extrap.py | 130 +++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 69 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 8930700a..604b3b75 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -35,6 +35,7 @@ from extrap.entities.model import Model from extrap.entities.functions import Function from extrap.entities.terms import DEFAULT_PARAM_NAMES +from extrap.entities.functions import ConstantFunction MODEL_TAG = "_extrap-model" @@ -167,10 +168,8 @@ def convert_function_to_scientific_notation(self, model_function: Function) -> s scientific_function = scientific_function.replace("*", "\\cdot") scientific_function = scientific_function.replace("(", "{") scientific_function = scientific_function.replace(")", "}") - scientific_function = scientific_function.replace( - "log2{p}", "\\log_2(p)") - scientific_function = scientific_function.replace( - "log2{q}", "\\log_2(q)") + scientific_function = scientific_function.replace("log2{p}", "\\log_2(p)") + scientific_function = scientific_function.replace("log2{q}", "\\log_2(q)") scientific_function = "$" + scientific_function + "$" return scientific_function @@ -203,8 +202,7 @@ def display_one_parameter_model( """ # sort based on x values - measures_sorted = sorted( - self.mdl.measurements, key=lambda x: x.coordinate[0]) + measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) # compute means, medians, mins, maxes params = [ms.coordinate[0] for ms in measures_sorted] # X values @@ -276,8 +274,7 @@ def display_one_parameter_model( from math import log2 # noqa: F401 y_vals_opt.append(eval(opt_scaling_func)) - ax.plot(x_vals, y_vals_opt, - label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") except Exception as e: print( "WARNING: optimal scaling curve could not be drawn. The function needs to be interpretable by the python eval() function and the parameters need to be the same as the ones shwon on the figures. See the following exception for more information: " @@ -289,8 +286,7 @@ def display_one_parameter_model( y_vals_opt = [] for _ in range(len(y_vals)): y_vals_opt.append(y_vals[0]) - ax.plot(x_vals, y_vals_opt, - label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") else: raise Exception( "Plotting the optimal scaling automatically is currently not supported for the chosen parameter." @@ -379,8 +375,7 @@ def draw_legend( ) handles.append(mark) - axis.legend(handles=handles, loc="center right", - bbox_to_anchor=(2.75, 0.5)) + axis.legend(handles=handles, loc="center right", bbox_to_anchor=(2.75, 0.5)) def display_two_parameter_model( self, @@ -412,8 +407,7 @@ def display_two_parameter_model( # sort based on x and y values measures_sorted = sorted( - self.mdl.measurements, key=lambda x: ( - x.coordinate[0], x.coordinate[1]) + self.mdl.measurements, key=lambda x: (x.coordinate[0], x.coordinate[1]) ) # get x, y value from measurements @@ -427,14 +421,21 @@ def display_two_parameter_model( maxes = [ms.maximum for ms in measures_sorted] # x value plotting range. Dynamic based off what the largest/smallest values are - x_vals = np.linspace( - start=X_params[0], stop=1.5 * X_params[-1], num=100) + x_vals = np.linspace(start=X_params[0], stop=1.5 * X_params[-1], num=100) # y value plotting range. Dynamic based off what the largest/smallest values are - y_vals = np.linspace( - start=Y_params[0], stop=1.5 * Y_params[-1], num=100) + y_vals = np.linspace(start=Y_params[0], stop=1.5 * Y_params[-1], num=100) x_vals, y_vals = np.meshgrid(x_vals, y_vals) - z_vals = self.mdl.hypothesis.function.evaluate([x_vals, y_vals]) + if isinstance(self.mdl.hypothesis.function, ConstantFunction) is True: + zy = [] + for i in range(len(x_vals)): + zx = [] + for j in range(len(x_vals[0])): + zx.append(self.mdl.hypothesis.function.evaluate([x_vals, y_vals])) + zy.append(zx) + z_vals = np.reshape(zy, (len(x_vals), len(y_vals))).T + else: + z_vals = self.mdl.hypothesis.function.evaluate([x_vals, y_vals]) # opt. scaling function used for auto. detection def opt_scaling_func_auto(x, y): @@ -530,21 +531,17 @@ def opt_scaling_func_user(p, q): X_params, Y_params, medians, c="black", marker="x", label="median" ) if show_mean: - ax.scatter(X_params, Y_params, means, - c="black", marker="+", label="mean") + ax.scatter(X_params, Y_params, means, c="black", marker="+", label="mean") if show_min_max: - ax.scatter(X_params, Y_params, mins, - c="black", marker="_", label="min") - ax.scatter(X_params, Y_params, maxes, - c="black", marker="_", label="max") + ax.scatter(X_params, Y_params, mins, c="black", marker="_", label="min") + ax.scatter(X_params, Y_params, maxes, c="black", marker="_", label="max") # Draw connecting line for min, max -> error bars line_x, line_y, line_z = [], [], [] for x, y, min_v, max_v in zip(X_params, Y_params, mins, maxes): line_x.append(x), line_x.append(x) line_y.append(y), line_y.append(y) line_z.append(min_v), line_z.append(max_v) - line_x.append(np.nan), line_y.append( - np.nan), line_z.append(np.nan) + line_x.append(np.nan), line_y.append(np.nan), line_z.append(np.nan) ax.plot(line_x, line_y, line_z, color="black") # axis labels and title @@ -748,8 +745,7 @@ def model_to_img_html(model_obj: Model) -> str: fig.savefig(figfile, format="jpg", transparent=False) figfile.seek(0) figdata_jpg = base64.b64encode(figfile.getvalue()).decode() - imgstr = ''.format( - figdata_jpg) + imgstr = ''.format(figdata_jpg) plt.close(fig) return imgstr @@ -765,12 +761,9 @@ def model_to_img_html(model_obj: Model) -> str: except KeyError: pass - frm_dict = { - met + MODEL_TAG: model_to_img_html for met in existing_metrics} + frm_dict = {met + MODEL_TAG: model_to_img_html for met in existing_metrics} # Subset of the aggregated statistics table with only the Extra-P columns selected - # TODO: to_html(escape=False, formatters=frm_dict), the formatter does not work for 3D stuff. - # need to find a workaround return self.tht.statsframe.dataframe[ [met + MODEL_TAG for met in existing_metrics] ].to_html(escape=False, formatters=frm_dict) @@ -838,8 +831,7 @@ def produce_models( experiment.add_parameter(Parameter(parameter)) # Ordering of profiles in the performance data table - ensemble_profile_ordering = list( - self.tht.dataframe.index.unique(level=1)) + ensemble_profile_ordering = list(self.tht.dataframe.index.unique(level=1)) profile_parameter_value_mapping = {} for profile in ensemble_profile_ordering: @@ -910,8 +902,7 @@ def produce_models( ): coordinate_exists = True try: - value = single_prof_df[str( - metric)].tolist() + value = single_prof_df[str(metric)].tolist() except Exception: raise ExtrapReaderException( "The metric '" @@ -929,8 +920,7 @@ def produce_models( # read out scaling parameter for total metric value calculation # if the resource allocation is static if scaling_parameter.isnumeric(): - ranks = int( - scaling_parameter) + ranks = int(scaling_parameter) # otherwise read number of ranks from the provided parameter else: # check if the parameter exists @@ -963,8 +953,7 @@ def produce_models( + ".", profile, ) - values.append( - value[0] * ranks) + values.append(value[0] * ranks) # add values for all other metrics else: values.append(value[0]) @@ -1042,8 +1031,7 @@ def produce_models( ] = ModelWrapper(model_gen.models[mkey], self.parameters) # Add statistics to aggregated statistics table if add_stats: - self._add_extrap_statistics( - thicket_node, str(metric)) + self._add_extrap_statistics(thicket_node, str(metric)) except Exception: pass @@ -1071,8 +1059,7 @@ def _componentize_function( for term in fnc.compound_terms: if len(parameters) == 1: # Join variables of the same term together - variable_column = " * ".join(t.to_string() - for t in term.simple_terms) + variable_column = " * ".join(t.to_string() for t in term.simple_terms) term_dict[variable_column] = term.coefficient else: @@ -1206,10 +1193,8 @@ def _analyze_complexity( comp = comp.replace("^", "**") complexity_class = "" + comp + "" coefficient = terms[max_index].coefficient - return_value[col + "_complexity_" + - target_str] = complexity_class - return_value[col + "_coefficient_" + - target_str] = coefficient + return_value[col + "_complexity_" + target_str] = complexity_class + return_value[col + "_coefficient_" + target_str] = coefficient else: comp = "" for parameter_term_pair in terms[max_index].parameter_term_pairs: @@ -1230,16 +1215,13 @@ def _analyze_complexity( ) comp = comp.replace("^", "**") complexity_class = "" + comp + "" - return_value[col + "_complexity_" + - target_str] = complexity_class - return_value[col + "_coefficient_" + - target_str] = term.coefficient + return_value[col + "_complexity_" + target_str] = complexity_class + return_value[col + "_coefficient_" + target_str] = term.coefficient else: complexity_class = "1" coefficient = fnc.constant_coefficient - return_value[col + "_complexity_" + - target_str] = complexity_class + return_value[col + "_complexity_" + target_str] = complexity_class return_value[col + "_coefficient_" + target_str] = coefficient return return_value @@ -1266,20 +1248,18 @@ def complexity_statsframe( "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." ) elif len(eval_targets) > 0: - # for each evaluation target check if the number of values matches the number of parameters for target in eval_targets: if len(target) != len(self.parameters): print( - "The number of given parameter values for the evaluation target need to be the same as the number of model parameters.") + "The number of given parameter values for the evaluation target need to be the same as the number of model parameters." + ) else: targets.append(target) # if there are targets to evaluate for if len(targets) > 0: - for target in targets: - target_str = "(" for param_value in target: target_str += str(param_value) @@ -1292,16 +1272,22 @@ def complexity_statsframe( columns = [ col for col in self.tht.statsframe.dataframe - if isinstance(self.tht.statsframe.dataframe[col].iloc[0], ModelWrapper) + if isinstance( + self.tht.statsframe.dataframe[col].iloc[0], ModelWrapper + ) ] # Error checking for c in columns: if c not in self.tht.statsframe.dataframe.columns: raise ValueError( - "column " + c + " is not in the aggregated statistics table." + "column " + + c + + " is not in the aggregated statistics table." ) - elif not isinstance(self.tht.statsframe.dataframe[c].iloc[0], ModelWrapper): + elif not isinstance( + self.tht.statsframe.dataframe[c].iloc[0], ModelWrapper + ): raise TypeError( "column " + c @@ -1355,8 +1341,13 @@ def complexity_statsframe( green = int(green / (1 / 255)) blue = int(blue / (1 / 255)) ansi_color_str = ( - "\033[38;2;" + str(red) + ";" + - str(green) + ";" + str(blue) + "m" + "\033[38;2;" + + str(red) + + ";" + + str(green) + + ";" + + str(blue) + + "m" ) color_map_dict[unique_classes[i]] = ansi_color_str @@ -1390,8 +1381,9 @@ def complexity_statsframe( ranking_list = [] for i in range(len(reverse_ranking)): ranking_dict = {} - ranking_dict[col + "_growth_rank_" + - target_str] = reverse_ranking[i] + ranking_dict[ + col + "_growth_rank_" + target_str + ] = reverse_ranking[i] ranking_list.append(ranking_dict) # Component dataframe @@ -1404,9 +1396,9 @@ def complexity_statsframe( color_list = [] for i in range(len(complexity_list)): color_list_dict = {} - color_list_dict[col + "_colormapping_" + target_str] = color_map_dict[ - complexity_list[i] - ] + color_list_dict[ + col + "_colormapping_" + target_str + ] = color_map_dict[complexity_list[i]] color_list.append(color_list_dict) color_map_df = pd.DataFrame( From af2e64ed55b3df386e4a48b63b17cc3559d93b9e Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Thu, 14 Sep 2023 16:07:15 -0700 Subject: [PATCH 34/57] removed color coding information after adding it into hatchet codebase --- thicket/model_extrap.py | 144 ++++++++++++++++++---------------------- 1 file changed, 63 insertions(+), 81 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 604b3b75..6641862f 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -168,8 +168,10 @@ def convert_function_to_scientific_notation(self, model_function: Function) -> s scientific_function = scientific_function.replace("*", "\\cdot") scientific_function = scientific_function.replace("(", "{") scientific_function = scientific_function.replace(")", "}") - scientific_function = scientific_function.replace("log2{p}", "\\log_2(p)") - scientific_function = scientific_function.replace("log2{q}", "\\log_2(q)") + scientific_function = scientific_function.replace( + "log2{p}", "\\log_2(p)") + scientific_function = scientific_function.replace( + "log2{q}", "\\log_2(q)") scientific_function = "$" + scientific_function + "$" return scientific_function @@ -202,7 +204,8 @@ def display_one_parameter_model( """ # sort based on x values - measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) + measures_sorted = sorted( + self.mdl.measurements, key=lambda x: x.coordinate[0]) # compute means, medians, mins, maxes params = [ms.coordinate[0] for ms in measures_sorted] # X values @@ -274,7 +277,8 @@ def display_one_parameter_model( from math import log2 # noqa: F401 y_vals_opt.append(eval(opt_scaling_func)) - ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, + label="optimal scaling", color="red") except Exception as e: print( "WARNING: optimal scaling curve could not be drawn. The function needs to be interpretable by the python eval() function and the parameters need to be the same as the ones shwon on the figures. See the following exception for more information: " @@ -286,7 +290,8 @@ def display_one_parameter_model( y_vals_opt = [] for _ in range(len(y_vals)): y_vals_opt.append(y_vals[0]) - ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, + label="optimal scaling", color="red") else: raise Exception( "Plotting the optimal scaling automatically is currently not supported for the chosen parameter." @@ -326,13 +331,14 @@ def display_one_parameter_model( return fig, ax def draw_legend( - self, axis: Axes, dict_callpath_color: dict[str, list[str]] + self, axis: Axes, dict_callpath_color: dict[str, list[str]], function_char_len: int ) -> None: """This method draws a legend for 3D plots. Args: axis (_type_): The matplotlib axis of a figure object. dict_callpath_color (dict): The color/marker dict for the elements displayed in the plot. + function_char_len (int): The number of chars of the model string. """ handles = list() @@ -375,7 +381,8 @@ def draw_legend( ) handles.append(mark) - axis.legend(handles=handles, loc="center right", bbox_to_anchor=(2.75, 0.5)) + axis.legend(handles=handles, loc="center right", + bbox_to_anchor=(1.75+(function_char_len)*0.01, 0.5)) def display_two_parameter_model( self, @@ -407,7 +414,8 @@ def display_two_parameter_model( # sort based on x and y values measures_sorted = sorted( - self.mdl.measurements, key=lambda x: (x.coordinate[0], x.coordinate[1]) + self.mdl.measurements, key=lambda x: ( + x.coordinate[0], x.coordinate[1]) ) # get x, y value from measurements @@ -421,9 +429,11 @@ def display_two_parameter_model( maxes = [ms.maximum for ms in measures_sorted] # x value plotting range. Dynamic based off what the largest/smallest values are - x_vals = np.linspace(start=X_params[0], stop=1.5 * X_params[-1], num=100) + x_vals = np.linspace( + start=X_params[0], stop=1.5 * X_params[-1], num=100) # y value plotting range. Dynamic based off what the largest/smallest values are - y_vals = np.linspace(start=Y_params[0], stop=1.5 * Y_params[-1], num=100) + y_vals = np.linspace( + start=Y_params[0], stop=1.5 * Y_params[-1], num=100) x_vals, y_vals = np.meshgrid(x_vals, y_vals) if isinstance(self.mdl.hypothesis.function, ConstantFunction) is True: @@ -431,7 +441,8 @@ def display_two_parameter_model( for i in range(len(x_vals)): zx = [] for j in range(len(x_vals[0])): - zx.append(self.mdl.hypothesis.function.evaluate([x_vals, y_vals])) + zx.append(self.mdl.hypothesis.function.evaluate( + [x_vals, y_vals])) zy.append(zx) z_vals = np.reshape(zy, (len(x_vals), len(y_vals))).T else: @@ -531,17 +542,21 @@ def opt_scaling_func_user(p, q): X_params, Y_params, medians, c="black", marker="x", label="median" ) if show_mean: - ax.scatter(X_params, Y_params, means, c="black", marker="+", label="mean") + ax.scatter(X_params, Y_params, means, + c="black", marker="+", label="mean") if show_min_max: - ax.scatter(X_params, Y_params, mins, c="black", marker="_", label="min") - ax.scatter(X_params, Y_params, maxes, c="black", marker="_", label="max") + ax.scatter(X_params, Y_params, mins, + c="black", marker="_", label="min") + ax.scatter(X_params, Y_params, maxes, + c="black", marker="_", label="max") # Draw connecting line for min, max -> error bars line_x, line_y, line_z = [], [], [] for x, y, min_v, max_v in zip(X_params, Y_params, mins, maxes): line_x.append(x), line_x.append(x) line_y.append(y), line_y.append(y) line_z.append(min_v), line_z.append(max_v) - line_x.append(np.nan), line_y.append(np.nan), line_z.append(np.nan) + line_x.append(np.nan), line_y.append( + np.nan), line_z.append(np.nan) ax.plot(line_x, line_y, line_z, color="black") # axis labels and title @@ -594,7 +609,8 @@ def opt_scaling_func_user(p, q): ) # draw the legend - self.draw_legend(ax, dict_callpath_color) + self.draw_legend(ax, dict_callpath_color, + len(str(scientific_function))) return fig, ax @@ -745,7 +761,8 @@ def model_to_img_html(model_obj: Model) -> str: fig.savefig(figfile, format="jpg", transparent=False) figfile.seek(0) figdata_jpg = base64.b64encode(figfile.getvalue()).decode() - imgstr = ''.format(figdata_jpg) + imgstr = ''.format( + figdata_jpg) plt.close(fig) return imgstr @@ -761,7 +778,8 @@ def model_to_img_html(model_obj: Model) -> str: except KeyError: pass - frm_dict = {met + MODEL_TAG: model_to_img_html for met in existing_metrics} + frm_dict = { + met + MODEL_TAG: model_to_img_html for met in existing_metrics} # Subset of the aggregated statistics table with only the Extra-P columns selected return self.tht.statsframe.dataframe[ @@ -831,7 +849,8 @@ def produce_models( experiment.add_parameter(Parameter(parameter)) # Ordering of profiles in the performance data table - ensemble_profile_ordering = list(self.tht.dataframe.index.unique(level=1)) + ensemble_profile_ordering = list( + self.tht.dataframe.index.unique(level=1)) profile_parameter_value_mapping = {} for profile in ensemble_profile_ordering: @@ -902,7 +921,8 @@ def produce_models( ): coordinate_exists = True try: - value = single_prof_df[str(metric)].tolist() + value = single_prof_df[str( + metric)].tolist() except Exception: raise ExtrapReaderException( "The metric '" @@ -920,7 +940,8 @@ def produce_models( # read out scaling parameter for total metric value calculation # if the resource allocation is static if scaling_parameter.isnumeric(): - ranks = int(scaling_parameter) + ranks = int( + scaling_parameter) # otherwise read number of ranks from the provided parameter else: # check if the parameter exists @@ -953,7 +974,8 @@ def produce_models( + ".", profile, ) - values.append(value[0] * ranks) + values.append( + value[0] * ranks) # add values for all other metrics else: values.append(value[0]) @@ -1031,7 +1053,8 @@ def produce_models( ] = ModelWrapper(model_gen.models[mkey], self.parameters) # Add statistics to aggregated statistics table if add_stats: - self._add_extrap_statistics(thicket_node, str(metric)) + self._add_extrap_statistics( + thicket_node, str(metric)) except Exception: pass @@ -1059,7 +1082,8 @@ def _componentize_function( for term in fnc.compound_terms: if len(parameters) == 1: # Join variables of the same term together - variable_column = " * ".join(t.to_string() for t in term.simple_terms) + variable_column = " * ".join(t.to_string() + for t in term.simple_terms) term_dict[variable_column] = term.coefficient else: @@ -1193,8 +1217,10 @@ def _analyze_complexity( comp = comp.replace("^", "**") complexity_class = "" + comp + "" coefficient = terms[max_index].coefficient - return_value[col + "_complexity_" + target_str] = complexity_class - return_value[col + "_coefficient_" + target_str] = coefficient + return_value[col + "_complexity_" + + target_str] = complexity_class + return_value[col + "_coefficient_" + + target_str] = coefficient else: comp = "" for parameter_term_pair in terms[max_index].parameter_term_pairs: @@ -1215,13 +1241,16 @@ def _analyze_complexity( ) comp = comp.replace("^", "**") complexity_class = "" + comp + "" - return_value[col + "_complexity_" + target_str] = complexity_class - return_value[col + "_coefficient_" + target_str] = term.coefficient + return_value[col + "_complexity_" + + target_str] = complexity_class + return_value[col + "_coefficient_" + + target_str] = term.coefficient else: complexity_class = "1" coefficient = fnc.constant_coefficient - return_value[col + "_complexity_" + target_str] = complexity_class + return_value[col + "_complexity_" + + target_str] = complexity_class return_value[col + "_coefficient_" + target_str] = coefficient return return_value @@ -1315,42 +1344,6 @@ def complexity_statsframe( all_dfs_columns.append(comp_df.columns) all_dfs.append(comp_df) - # add color coding for complexity classes to data frame - color_map_dict = {} - column_key = None - for col in all_dfs_columns[0].values: - if "model_complexity" in col: - column_key = col - break - unique_classes = [] - complexity_list = [] - for i in range(len(all_dfs[0][column_key])): - complexity_list.append(all_dfs[0][column_key].iloc[i]) - if all_dfs[0][column_key].iloc[i] not in unique_classes: - unique_classes.append(all_dfs[0][column_key].iloc[i]) - range_values = np.arange(0, 1, 1 / len(unique_classes)) - # cmap = mpl.cm.get_cmap('brg') - cmap = mpl.cm.get_cmap("tab20b") - for i in range(len(range_values)): - range_val = range_values[i] - rgba = cmap(range_val) - red = rgba[0] - green = rgba[1] - blue = rgba[2] - red = int(red / (1 / 255)) - green = int(green / (1 / 255)) - blue = int(blue / (1 / 255)) - ansi_color_str = ( - "\033[38;2;" - + str(red) - + ";" - + str(green) - + ";" - + str(blue) - + "m" - ) - color_map_dict[unique_classes[i]] = ansi_color_str - # Concatenate dataframes horizontally all_dfs.insert(0, self.tht.statsframe.dataframe) self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) @@ -1393,25 +1386,11 @@ def complexity_statsframe( all_dfs.append(comp_df) - color_list = [] - for i in range(len(complexity_list)): - color_list_dict = {} - color_list_dict[ - col + "_colormapping_" + target_str - ] = color_map_dict[complexity_list[i]] - color_list.append(color_list_dict) - - color_map_df = pd.DataFrame( - data=color_list, index=self.tht.statsframe.dataframe.index - ) - - all_dfs.append(color_map_df) - # Concatenate dataframes horizontally all_dfs.insert(0, self.tht.statsframe.dataframe) self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) - # otherwise rais Exception + # otherwise raise an Exception else: raise Exception( "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." @@ -1466,4 +1445,7 @@ def phase_statsframe( # TODO: how to return the data back, because pandas can't aggregate functions with each other, # so there is no point in introducing an extra column type(MPI,comp) to group by that... - return self.tht.statsframe.dataframe + d = {'col1': [1, 2], 'col2': [3, 4]} + df = pd.DataFrame(data=d) + + return df From fdb1a9f25ae43c012cb2d8663f0e3e33ff17263b Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Fri, 15 Sep 2023 16:37:22 -0700 Subject: [PATCH 35/57] added basic implementation for multi_display function and phase analysis --- thicket/model_extrap.py | 300 ++++++++++++++++++++++++++++++++++------ 1 file changed, 260 insertions(+), 40 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 6641862f..c8a7abdf 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -1397,55 +1397,275 @@ def complexity_statsframe( ) def phase_statsframe( - self, columns: list[str] = None, eval_target: float = None + self, columns: list[str] = None, add_stats=True ) -> DataFrame: """Analysis the thicket statsframe by grouping application phases such as computation and communication together to create performance models for these phases. - Args: - columns (list[str], optional): The list of columns (metrics) that should be considered or the phase analysis. Defaults to None. - eval_target (float, optional): The target evaluation scale for the phase analysis. Defaults to None. - - Raises: - Exception: Raises an exception if the target scale is not provided. - Returns: DataFrame: A thicket DataFrame that contains only the phase results. """ - if eval_target is None: - raise Exception( - "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." - ) - else: - # Use all Extra-P columns - if columns is None: - columns = [ - col - for col in self.tht.statsframe.dataframe - if isinstance(self.tht.statsframe.dataframe[col][0], ModelWrapper) - ] - - print("columns:", columns) - - callpaths = self.tht.statsframe.dataframe["name"].values.tolist() - print("callpaths:", callpaths) - - communication = {} - computation = {} - for i in range(len(callpaths)): - if "MPI" in callpaths[i]: - communication[callpaths[i]] = i + + """# Use all Extra-P columns + if columns is None: + columns = [ + col + for col in self.tht.statsframe.dataframe + if isinstance(self.tht.statsframe.dataframe[col].iloc[0], ModelWrapper) + ]""" + + # TODO: should either use all metrics available in the data frame or use the one provided by the user... + print("columns:", columns) + + # differentiate between computation and communication functions + # NOTE: this code could be more sophisticated but also depends on the application being analyzed + callpaths = self.tht.statsframe.dataframe["name"].values.tolist() + communication = {} + computation = {} + for i in range(len(callpaths)): + if "MPI" in callpaths[i]: + communication[callpaths[i]] = i + else: + computation[callpaths[i]] = i + + print("communication:", communication) + print("computation:", computation) + + # aggregate measurements inside the extra-p models from all communication functions + comm_measurements = {} + for _, value in communication.items(): + measurement_list = self.tht.statsframe.dataframe[ + "Avg time/rank (exc)_extrap-model"].iloc[value].mdl.measurements + for i in range(len(measurement_list)): + measurement_list[i].coordinate + measurement_list[i].median + if measurement_list[i].coordinate not in comm_measurements: + comm_measurements[measurement_list[i] + .coordinate] = measurement_list[i].median + else: + comm_measurements[measurement_list[i] + .coordinate] += measurement_list[i].median + + # aggregate measurements inside the extra-p models from all computation functions + comp_measurements = {} + for _, value in computation.items(): + measurement_list = self.tht.statsframe.dataframe[ + "Avg time/rank (exc)_extrap-model"].iloc[value].mdl.measurements + for i in range(len(measurement_list)): + measurement_list[i].coordinate + measurement_list[i].median + if measurement_list[i].coordinate not in comp_measurements: + comp_measurements[measurement_list[i] + .coordinate] = measurement_list[i].median else: - computation[callpaths[i]] = i + comp_measurements[measurement_list[i] + .coordinate] += measurement_list[i].median + + print("comm_measurements:", comm_measurements) + print("comp_measurements:", comp_measurements) + + # create a new Extra-P experiment, one for each phase model + experiment = Experiment() + # Parameter() + # experiment.add_parameter(Parameter(parameter)) + + # TODO: automtically + metric = Metric("Avg time/rank (exc)_extrap-model") + experiment.add_metric(metric) + + com_callpath = Callpath("communication") + experiment.add_callpath(com_callpath) + comp_callpath = Callpath("computation") + experiment.add_callpath(comp_callpath) + + for i in range(len(next(iter(comp_measurements)))): + experiment.add_parameter( + Parameter(str(DEFAULT_PARAM_NAMES[i]))) + + for key, value in comp_measurements.items(): + if key not in experiment.coordinates: + experiment.add_coordinate(key) + measurement = Measurement( + key, comp_callpath, metric, value) + experiment.add_measurement(measurement) + + for key, value in comm_measurements.items(): + if key not in experiment.coordinates: + experiment.add_coordinate(key) + measurement = Measurement( + key, com_callpath, metric, value) + experiment.add_measurement(measurement) + + # create models using the new experiment for aggregated functions + model_gen = ModelGenerator( + experiment, name="Default Model", use_median=True + ) + model_gen.model_all() + experiment.add_modeler(model_gen) + + # create new empty thicket dataframe + df = DataFrame() + + # create deepcopy of thicket + new_thicket = self.tht.deepcopy() + # x = new_thicket.statsframe.dataframe + # print(new_thicket) + # print(x) + + """d = {"node": [{"name": "communication", "type": "function"}, { + "name": "computation", "type": "function"}], "name": ["communication", "computation"]}""" + dx = pd.DataFrame({"name": ["computation"], "type": ["function"]}) + # dx = pd.DataFrame(data=x) + print(dx) + dy = pd.DataFrame({"name": ["communication"], "type": ["function"]}) + # dy = pd.DataFrame(data=y) + + d = {"node": [dx, dy], "name": ["communication", "computation"]} + df = pd.DataFrame(data=d) + print(df) + + """from collections import defaultdict + data_dict = defaultdict(list) + data_dict["node"].append(matches[0])""" + + import thicket.helpers as helpers + new_thicket.statsframe.dataframe = helpers._new_statsframe_df( + df) + print(new_thicket.statsframe.dataframe) + + # add the models in the new data frame + params = [str(p) for p in experiment.parameters] + for callpath in experiment.callpaths: + for metric in experiment.metrics: + mkey = (callpath, metric) + for thicket_node, _ in self.tht.dataframe.groupby(level=0): + if Callpath(thicket_node.frame["name"]) == callpath: + # catch key errors when queriying for models with a callpath, metric combination + # that does not exist because there was no measurement object created for them + try: + # TODO: need to be replaced with the new statsframe that I need to create + self.tht.statsframe.dataframe.at[ + thicket_node, str(metric) + MODEL_TAG + ] = ModelWrapper(model_gen.models[mkey], params) + # Add statistics to aggregated statistics table + if add_stats: + self._add_extrap_statistics( + thicket_node, str(metric)) + except Exception: + pass + + # TODO: how to return the data back, because pandas can't aggregate functions with each other, + # so there is no point in introducing an extra column type(MPI,comp) to group by that... + + d = {'col1': [1, 2], 'col2': [3, 4]} + df = pd.DataFrame(data=d) + + return df + + +def multi_display_one_parameter_model(model_objects): + pass + + +def multi_display_two_parameter_model(model_objects): + + plt.ioff() + fig = plt.figure() + ax = fig.add_subplot(projection="3d") - print("communication:", communication) - print("computation:", computation) + # create dict for legend color and markers + dict_callpath_color = {} - # TODO: aggregate the functions for both types and come up with one that describes all of them + # sort based on x and y values + measures_sorted = sorted( + model_objects[0].mdl.measurements, key=lambda x: ( + x.coordinate[0], x.coordinate[1]) + ) - # TODO: how to return the data back, because pandas can't aggregate functions with each other, - # so there is no point in introducing an extra column type(MPI,comp) to group by that... + # get x, y value from measurements + X_params = [ms.coordinate[0] for ms in measures_sorted] # X values + Y_params = [ms.coordinate[1] for ms in measures_sorted] # Y values - d = {'col1': [1, 2], 'col2': [3, 4]} - df = pd.DataFrame(data=d) + # get median, mean, min, and max values + # medians = [ms.median for ms in measures_sorted] + # means = [ms.mean for ms in measures_sorted] + # mins = [ms.minimum for ms in measures_sorted] + # maxes = [ms.maximum for ms in measures_sorted] + + # x value plotting range. Dynamic based off what the largest/smallest values are + x_vals = np.linspace( + start=X_params[0], stop=1.5 * X_params[-1], num=100) + # y value plotting range. Dynamic based off what the largest/smallest values are + y_vals = np.linspace( + start=Y_params[0], stop=1.5 * Y_params[-1], num=100) + + x_vals, y_vals = np.meshgrid(x_vals, y_vals) + + for i in range(len(model_objects)): + + # sort based on x and y values + measures_sorted = sorted( + model_objects[i].mdl.measurements, key=lambda x: ( + x.coordinate[0], x.coordinate[1]) + ) + + if isinstance(model_objects[i].mdl.hypothesis.function, ConstantFunction) is True: + zy = [] + for i in range(len(x_vals)): + zx = [] + for j in range(len(x_vals[0])): + zx.append(model_objects[i].mdl.hypothesis.function.evaluate( + [x_vals, y_vals])) + zy.append(zx) + z_vals = np.reshape(zy, (len(x_vals), len(y_vals))).T + else: + z_vals = model_objects[i].mdl.hypothesis.function.evaluate([ + x_vals, y_vals]) + + ax.plot_surface( + x_vals, + y_vals, + z_vals, + label=str(model_objects[i].mdl.hypothesis.function), + rstride=1, + cstride=1, + antialiased=False, + alpha=0.1, + color="blue", + ) + + # create scientific representation of create performance model + # scientific_function = convert_function_to_scientific_notation( + # self.mdl.hypothesis.function + # ) + + # dict_callpath_color[str(scientific_function)] = ["surface", "blue"] + + # axis labels and title + ax.set_xlabel(model_objects[0].parameters[0] + " $p$") + ax.set_ylabel(model_objects[0].parameters[1] + " $q$") + ax.set_zlabel(model_objects[0].mdl.metric) + # TODO: do we need a title for this plots? + # ax.set_title(str(model_objects[0].mdl.callpath) + "()") + + # draw the legend + # draw_legend(ax, dict_callpath_color, len(str(scientific_function))) + + return fig, ax + + +def multi_display(model_objects): + # check number of model parameters + if len(model_objects[0].parameters) == 1: + fig, ax = multi_display_one_parameter_model(model_objects) + + elif len(model_objects[0].parameters) == 2: + fig, ax = multi_display_two_parameter_model(model_objects) + + else: + raise Exception( + "Plotting performance models with " + + str(len(model_objects[0].parameters)) + + " parameters is currently not supported." + ) - return df + return fig, ax From e1e86497cd827d1206d6e57e945f0ddfde29dd4a Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Mon, 18 Sep 2023 17:26:14 -0700 Subject: [PATCH 36/57] multi display method working for multi-parameter models --- thicket/model_extrap.py | 259 ++++++++++++++++++++++++++++++---------- 1 file changed, 198 insertions(+), 61 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index c8a7abdf..ba3bfce4 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -1568,87 +1568,77 @@ def multi_display_one_parameter_model(model_objects): def multi_display_two_parameter_model(model_objects): - plt.ioff() - fig = plt.figure() - ax = fig.add_subplot(projection="3d") + parameters = model_objects[0].parameters + + functions = [] + for model_object in model_objects: + functions.append(model_object.mdl.hypothesis.function) + + # create scientific representation of created performance models + scientific_functions = convert_functions_to_scientific_notations( + functions + ) + + # chose the color map to take the colors from dynamically + range_values = np.arange( + 0, 1, 1 / len(model_objects)) + cmap = mpl.cm.get_cmap("brg") + rgbas = [] + for value in range_values: + rgba = cmap(value) + rgbas.append(rgba) # create dict for legend color and markers dict_callpath_color = {} + function_char_len = 0 + for i in range(len(scientific_functions)): + dict_callpath_color[str(model_objects[i].mdl.callpath)+": "+str(scientific_functions[i])] = [ + "surface", rgbas[i]] + if i == 0: + function_char_len = len(str(scientific_functions[i])) + else: + if len(str(scientific_functions[i])) > function_char_len: + function_char_len = len(str(scientific_functions[i])) + + plt.ioff() + fig = plt.figure() + ax = fig.add_subplot(projection='3d') # sort based on x and y values measures_sorted = sorted( model_objects[0].mdl.measurements, key=lambda x: ( x.coordinate[0], x.coordinate[1]) ) - - # get x, y value from measurements - X_params = [ms.coordinate[0] for ms in measures_sorted] # X values - Y_params = [ms.coordinate[1] for ms in measures_sorted] # Y values - - # get median, mean, min, and max values - # medians = [ms.median for ms in measures_sorted] - # means = [ms.mean for ms in measures_sorted] - # mins = [ms.minimum for ms in measures_sorted] - # maxes = [ms.maximum for ms in measures_sorted] - - # x value plotting range. Dynamic based off what the largest/smallest values are - x_vals = np.linspace( - start=X_params[0], stop=1.5 * X_params[-1], num=100) - # y value plotting range. Dynamic based off what the largest/smallest values are - y_vals = np.linspace( - start=Y_params[0], stop=1.5 * Y_params[-1], num=100) - - x_vals, y_vals = np.meshgrid(x_vals, y_vals) - - for i in range(len(model_objects)): - - # sort based on x and y values - measures_sorted = sorted( - model_objects[i].mdl.measurements, key=lambda x: ( - x.coordinate[0], x.coordinate[1]) - ) - - if isinstance(model_objects[i].mdl.hypothesis.function, ConstantFunction) is True: - zy = [] - for i in range(len(x_vals)): - zx = [] - for j in range(len(x_vals[0])): - zx.append(model_objects[i].mdl.hypothesis.function.evaluate( - [x_vals, y_vals])) - zy.append(zx) - z_vals = np.reshape(zy, (len(x_vals), len(y_vals))).T - else: - z_vals = model_objects[i].mdl.hypothesis.function.evaluate([ - x_vals, y_vals]) - + X_params = [ms.coordinate[0] for ms in measures_sorted] + Y_params = [ms.coordinate[1] for ms in measures_sorted] + maxX = 1.5 * X_params[-1] + maxY = 1.5 * Y_params[-1] + X, Y, Z_List, z_List = calculate_z_models( + maxX, maxY, model_objects, parameters) + + for i in range(len(Z_List)): ax.plot_surface( - x_vals, - y_vals, - z_vals, - label=str(model_objects[i].mdl.hypothesis.function), + X, Y, Z_List[i], label="optimal scaling", rstride=1, cstride=1, antialiased=False, - alpha=0.1, - color="blue", - ) - - # create scientific representation of create performance model - # scientific_function = convert_function_to_scientific_notation( - # self.mdl.hypothesis.function - # ) - - # dict_callpath_color[str(scientific_function)] = ["surface", "blue"] + alpha=0.3, color=rgbas[i]) # axis labels and title ax.set_xlabel(model_objects[0].parameters[0] + " $p$") ax.set_ylabel(model_objects[0].parameters[1] + " $q$") ax.set_zlabel(model_objects[0].mdl.metric) - # TODO: do we need a title for this plots? - # ax.set_title(str(model_objects[0].mdl.callpath) + "()") # draw the legend - # draw_legend(ax, dict_callpath_color, len(str(scientific_function))) + handles = list() + for key, value in dict_callpath_color.items(): + labelName = str(key) + if value[0] == "surface": + patch = mpatches.Patch(color=value[1], label=labelName) + handles.append(patch) + + ax.legend(handles=handles, loc="center right", + bbox_to_anchor=(2+(function_char_len)*0.01, 0.5)) return fig, ax @@ -1669,3 +1659,150 @@ def multi_display(model_objects): ) return fig, ax + + +def calculate_z_models(maxX, maxY, model_list, parameters, max_z=0): + # define grid parameters based on max x and max y value + pixelGap_x, pixelGap_y = calculate_grid_parameters(maxX, maxY) + # Get the grid of the x and y values + x = np.arange(1.0, maxX, pixelGap_x) + y = np.arange(1.0, maxY, pixelGap_y) + X, Y = np.meshgrid(x, y) + # Get the z value for the x and y value + z_List = list() + Z_List = list() + previous = np.seterr(invalid='ignore', divide='ignore') + for model in model_list: + function = model.mdl.hypothesis.function + zs = calculate_z_optimized(X, Y, function, parameters, maxX, maxY) + Z = zs.reshape(X.shape) + z_List.append(zs) + Z_List.append(Z) + max_z = max(max_z, np.max(zs[np.logical_not(np.isinf(zs))])) + np.seterr(**previous) + for z, Z in zip(z_List, Z_List): + z[np.isinf(z)] = max_z + Z[np.isinf(Z)] = max_z + return X, Y, Z_List, z_List + + +def calculate_grid_parameters(maxX, maxY): + number_of_pixels_x = 50 + number_of_pixels_y = 50 + + pixel_gap_x = getPixelGap(0, maxX, number_of_pixels_x) + pixel_gap_y = getPixelGap(0, maxY, number_of_pixels_y) + return pixel_gap_x, pixel_gap_y + + +def getPixelGap(lowerlimit, upperlimit, numberOfPixels): + """ + This function calculate the gap in pixels based on number of pixels and max value + """ + pixelGap = (upperlimit - lowerlimit) / numberOfPixels + return pixelGap + + +def calculate_z_optimized(X, Y, function, parameters, maxX, maxY): + """ + This function evaluates the function passed to it. + """ + xs, ys = X.reshape(-1), Y.reshape(-1) + points = np.ndarray( + (len(parameters), len(xs))) + + points[0] = maxX + points[1] = maxY + param1 = 0 + param2 = 1 + if param1 >= 0: + points[param1] = xs + if param2 >= 0: + points[param2] = ys + + z_value = function.evaluate(points) + return z_value + + +def convert_functions_to_scientific_notations(model_functions: list[Function]) -> list[str]: + """This function converts the created performance model function into a + scientific notation in string format. + + Args: + list[model_function] (Extra-P Model): The Extra-P Model object list containing the scaling functions. + + Returns: + list[str]: The resulting scientific version of the performance functions in a list. + """ + + scientific_functions = [] + for model_function in model_functions: + + function_terms = len(model_function.compound_terms) + model_copy = copy.deepcopy(model_function) + model_copy.constant_coefficient = ( + convert_coefficient_to_science_notation( + model_function.constant_coefficient + ) + ) + for i in range(function_terms): + model_copy.compound_terms[ + i + ].coefficient = convert_coefficient_to_science_notation( + model_function.compound_terms[i].coefficient + ) + scientific_function = str(model_copy) + scientific_function = scientific_function.replace("+-", "-") + scientific_function = scientific_function.replace("+ -", "-") + scientific_function = scientific_function.replace("*", "\\cdot") + scientific_function = scientific_function.replace("(", "{") + scientific_function = scientific_function.replace(")", "}") + scientific_function = scientific_function.replace( + "log2{p}", "\\log_2(p)") + scientific_function = scientific_function.replace( + "log2{q}", "\\log_2(q)") + scientific_function = "$" + scientific_function + "$" + scientific_functions.append(scientific_function) + return scientific_functions + + +def convert_coefficient_to_science_notation(coefficient: float) -> str: + """This function converts an Extra-P model coefficient into scientific + notation and returns it as a string. It also shortes the coefficients + to three decimal places. + + Args: + coefficient (float): A model coefficient from a Extra-P function. + + Returns: + str: The coefficient in scientific notation. + """ + f = mticker.ScalarFormatter(useMathText=True) + f.set_powerlimits((-3, 3)) + x = "{}".format(f.format_data(float(coefficient))) + terms = x.split(" ") + if not terms[0][:1].isnumeric(): + coeff = terms[0][1:] + try: + coeff = "{:.3f}".format(float(coeff)) + except ValueError: + pass + new_coeff = "" + new_coeff += "-" + new_coeff += coeff + for i in range(len(terms)): + if i != 0: + new_coeff += terms[i] + return new_coeff + else: + coeff = terms[0] + try: + coeff = "{:.3f}".format(float(coeff)) + except ValueError: + pass + new_coeff = "" + new_coeff += coeff + for i in range(len(terms)): + if i != 0: + new_coeff += terms[i] + return new_coeff From 858ba7af65df511e67332419a913492df836d6e6 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Mon, 18 Sep 2023 18:32:21 -0700 Subject: [PATCH 37/57] produce_aggregated_model function is working --- thicket/model_extrap.py | 220 +++++++++++++++------------------------- 1 file changed, 81 insertions(+), 139 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index ba3bfce4..4f4fe1ee 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -1396,104 +1396,53 @@ def complexity_statsframe( "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." ) - def phase_statsframe( - self, columns: list[str] = None, add_stats=True - ) -> DataFrame: + def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> DataFrame: """Analysis the thicket statsframe by grouping application phases such as computation and communication together to create performance models for these phases. - - Returns: - DataFrame: A thicket DataFrame that contains only the phase results. """ - """# Use all Extra-P columns - if columns is None: - columns = [ - col - for col in self.tht.statsframe.dataframe - if isinstance(self.tht.statsframe.dataframe[col].iloc[0], ModelWrapper) - ]""" - - # TODO: should either use all metrics available in the data frame or use the one provided by the user... - print("columns:", columns) - - # differentiate between computation and communication functions - # NOTE: this code could be more sophisticated but also depends on the application being analyzed callpaths = self.tht.statsframe.dataframe["name"].values.tolist() - communication = {} - computation = {} - for i in range(len(callpaths)): - if "MPI" in callpaths[i]: - communication[callpaths[i]] = i - else: - computation[callpaths[i]] = i - - print("communication:", communication) - print("computation:", computation) # aggregate measurements inside the extra-p models from all communication functions - comm_measurements = {} - for _, value in communication.items(): - measurement_list = self.tht.statsframe.dataframe[ - "Avg time/rank (exc)_extrap-model"].iloc[value].mdl.measurements - for i in range(len(measurement_list)): - measurement_list[i].coordinate - measurement_list[i].median - if measurement_list[i].coordinate not in comm_measurements: - comm_measurements[measurement_list[i] - .coordinate] = measurement_list[i].median - else: - comm_measurements[measurement_list[i] - .coordinate] += measurement_list[i].median - - # aggregate measurements inside the extra-p models from all computation functions - comp_measurements = {} - for _, value in computation.items(): - measurement_list = self.tht.statsframe.dataframe[ - "Avg time/rank (exc)_extrap-model"].iloc[value].mdl.measurements - for i in range(len(measurement_list)): - measurement_list[i].coordinate - measurement_list[i].median - if measurement_list[i].coordinate not in comp_measurements: - comp_measurements[measurement_list[i] - .coordinate] = measurement_list[i].median - else: - comp_measurements[measurement_list[i] - .coordinate] += measurement_list[i].median + agg_measurements_list = [] + for metric in self.metrics: + agg_measurements = {} + for i in range(len(callpaths)): + measurement_list = self.tht.statsframe.dataframe[ + str(metric)+"_extrap-model"].iloc[i].mdl.measurements + for i in range(len(measurement_list)): + measurement_list[i].coordinate + measurement_list[i].median + if measurement_list[i].coordinate not in agg_measurements: + agg_measurements[measurement_list[i] + .coordinate] = measurement_list[i].median + else: + agg_measurements[measurement_list[i] + .coordinate] += measurement_list[i].median + agg_measurements_list.append(agg_measurements) - print("comm_measurements:", comm_measurements) - print("comp_measurements:", comp_measurements) + print("agg_measurements:", agg_measurements) # create a new Extra-P experiment, one for each phase model experiment = Experiment() - # Parameter() - # experiment.add_parameter(Parameter(parameter)) - # TODO: automtically - metric = Metric("Avg time/rank (exc)_extrap-model") - experiment.add_metric(metric) + for metric in self.metrics: + metric = Metric(str(metric)) + experiment.add_metric(metric) - com_callpath = Callpath("communication") - experiment.add_callpath(com_callpath) - comp_callpath = Callpath("computation") - experiment.add_callpath(comp_callpath) + aggregated_callpath = Callpath("aggregated_nodes") + experiment.add_callpath(aggregated_callpath) - for i in range(len(next(iter(comp_measurements)))): + for i in range(len(next(iter(agg_measurements)))): experiment.add_parameter( Parameter(str(DEFAULT_PARAM_NAMES[i]))) - for key, value in comp_measurements.items(): - if key not in experiment.coordinates: - experiment.add_coordinate(key) - measurement = Measurement( - key, comp_callpath, metric, value) - experiment.add_measurement(measurement) - - for key, value in comm_measurements.items(): - if key not in experiment.coordinates: - experiment.add_coordinate(key) - measurement = Measurement( - key, com_callpath, metric, value) - experiment.add_measurement(measurement) + for metric in self.metrics: + for key, value in agg_measurements.items(): + if key not in experiment.coordinates: + experiment.add_coordinate(key) + measurement = Measurement( + key, aggregated_callpath, metric, value) + experiment.add_measurement(measurement) # create models using the new experiment for aggregated functions model_gen = ModelGenerator( @@ -1502,64 +1451,53 @@ def phase_statsframe( model_gen.model_all() experiment.add_modeler(model_gen) - # create new empty thicket dataframe - df = DataFrame() - - # create deepcopy of thicket - new_thicket = self.tht.deepcopy() - # x = new_thicket.statsframe.dataframe - # print(new_thicket) - # print(x) - - """d = {"node": [{"name": "communication", "type": "function"}, { - "name": "computation", "type": "function"}], "name": ["communication", "computation"]}""" - dx = pd.DataFrame({"name": ["computation"], "type": ["function"]}) - # dx = pd.DataFrame(data=x) - print(dx) - dy = pd.DataFrame({"name": ["communication"], "type": ["function"]}) - # dy = pd.DataFrame(data=y) - - d = {"node": [dx, dy], "name": ["communication", "computation"]} - df = pd.DataFrame(data=d) - print(df) - - """from collections import defaultdict - data_dict = defaultdict(list) - data_dict["node"].append(matches[0])""" - - import thicket.helpers as helpers - new_thicket.statsframe.dataframe = helpers._new_statsframe_df( - df) - print(new_thicket.statsframe.dataframe) - - # add the models in the new data frame - params = [str(p) for p in experiment.parameters] - for callpath in experiment.callpaths: - for metric in experiment.metrics: - mkey = (callpath, metric) - for thicket_node, _ in self.tht.dataframe.groupby(level=0): - if Callpath(thicket_node.frame["name"]) == callpath: - # catch key errors when queriying for models with a callpath, metric combination - # that does not exist because there was no measurement object created for them - try: - # TODO: need to be replaced with the new statsframe that I need to create - self.tht.statsframe.dataframe.at[ - thicket_node, str(metric) + MODEL_TAG - ] = ModelWrapper(model_gen.models[mkey], params) - # Add statistics to aggregated statistics table - if add_stats: - self._add_extrap_statistics( - thicket_node, str(metric)) - except Exception: - pass - - # TODO: how to return the data back, because pandas can't aggregate functions with each other, - # so there is no point in introducing an extra column type(MPI,comp) to group by that... + # create empty pandas dataframe with columns only + aggregated_df = pd.DataFrame(columns=["name"]) + for metric in self.metrics: + if add_stats is True: + aggregated_df.insert(len(aggregated_df.columns), + str(metric)+"_extrap-model", None) + aggregated_df.insert(len(aggregated_df.columns), + str(metric)+"_RSS_extrap-model", None) + aggregated_df.insert(len(aggregated_df.columns), + str(metric)+"_rRSS_extrap-model", None) + aggregated_df.insert(len(aggregated_df.columns), + str(metric)+"_SMAPE_extrap-model", None) + aggregated_df.insert(len(aggregated_df.columns), + str(metric)+"_AR2_extrap-model", None) + aggregated_df.insert(len(aggregated_df.columns), + str(metric)+"_RE_extrap-model", None) + else: + aggregated_df.insert(len(aggregated_df.columns), + str(metric)+"_extrap-model", None) + aggregated_df.insert(len(aggregated_df.columns), + str(metric)+"_RSS_extrap-model", None) - d = {'col1': [1, 2], 'col2': [3, 4]} - df = pd.DataFrame(data=d) + for metric in self.metrics: + model = model_gen.models[(aggregated_callpath, metric)] + RSS = model.hypothesis._RSS + rRSS = model.hypothesis._rRSS + SMAPE = model.hypothesis._SMAPE + AR2 = model.hypothesis._AR2 + RE = model.hypothesis._RE + function = str(model.hypothesis.function) + if add_stats is True: + aggregated_df.loc[len(aggregated_df)] = [ + "aggregated_nodes", + function, + RSS, + rRSS, + SMAPE, + AR2, + RE, + ] + else: + aggregated_df.loc[len(aggregated_df)] = [ + "aggregated_nodes", + function, + ] - return df + return aggregated_df def multi_display_one_parameter_model(model_objects): @@ -1582,7 +1520,11 @@ def multi_display_two_parameter_model(model_objects): # chose the color map to take the colors from dynamically range_values = np.arange( 0, 1, 1 / len(model_objects)) - cmap = mpl.cm.get_cmap("brg") + if len(model_objects) <= 20: + colormap = "tab20" + else: + colormap = "Spectral" + cmap = mpl.cm.get_cmap(colormap) rgbas = [] for value in range_values: rgba = cmap(value) From a37ecd5a95de83e79b824bb218e616f469165556 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Tue, 19 Sep 2023 15:45:40 -0700 Subject: [PATCH 38/57] single-parameter support for mutli_display() --- thicket/model_extrap.py | 160 +++++++++++++++++++++++++++++++++++----- 1 file changed, 143 insertions(+), 17 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 4f4fe1ee..a9e12bd9 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -20,6 +20,7 @@ import pandas as pd from pandas import DataFrame from scipy.stats import rankdata +from math import sqrt from hatchet import node @@ -77,6 +78,7 @@ def __init__(self, mdl: Model, parameters: list[str]) -> None: """ self.mdl = mdl self.parameters = parameters + self.default_param_names = DEFAULT_PARAM_NAMES def __str__(self) -> str: """Returns the Extra-P performance model function as a string. @@ -169,9 +171,9 @@ def convert_function_to_scientific_notation(self, model_function: Function) -> s scientific_function = scientific_function.replace("(", "{") scientific_function = scientific_function.replace(")", "}") scientific_function = scientific_function.replace( - "log2{p}", "\\log_2(p)") + "log2{"+str(DEFAULT_PARAM_NAMES[0])+"}", "\\log_2("+str(DEFAULT_PARAM_NAMES[0])+")") scientific_function = scientific_function.replace( - "log2{q}", "\\log_2(q)") + "log2{"+str(DEFAULT_PARAM_NAMES[1])+"}", "\\log_2("+str(DEFAULT_PARAM_NAMES[1])+")") scientific_function = "$" + scientific_function + "$" return scientific_function @@ -298,7 +300,8 @@ def display_one_parameter_model( ) # plot axes and titles - ax.set_xlabel(self.parameters[0] + " $p$") + ax.set_xlabel(self.parameters[0] + " $" + + str(DEFAULT_PARAM_NAMES[0])+"$") ax.set_ylabel(self.mdl.metric) ax.set_title(str(self.mdl.callpath) + "()") @@ -560,8 +563,10 @@ def opt_scaling_func_user(p, q): ax.plot(line_x, line_y, line_z, color="black") # axis labels and title - ax.set_xlabel(self.parameters[0] + " $p$") - ax.set_ylabel(self.parameters[1] + " $q$") + ax.set_xlabel(self.parameters[0] + " $" + + str(DEFAULT_PARAM_NAMES[0])+"$") + ax.set_ylabel(self.parameters[1] + " $" + + str(DEFAULT_PARAM_NAMES[1])+"$") ax.set_zlabel(self.mdl.metric) ax.set_title(str(self.mdl.callpath) + "()") @@ -1404,9 +1409,13 @@ def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> D # aggregate measurements inside the extra-p models from all communication functions agg_measurements_list = [] + parameters = None for metric in self.metrics: agg_measurements = {} for i in range(len(callpaths)): + if parameters is None: + parameters = self.tht.statsframe.dataframe[ + str(metric)+"_extrap-model"].iloc[i].parameters measurement_list = self.tht.statsframe.dataframe[ str(metric)+"_extrap-model"].iloc[i].mdl.measurements for i in range(len(measurement_list)): @@ -1420,8 +1429,6 @@ def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> D .coordinate] += measurement_list[i].median agg_measurements_list.append(agg_measurements) - print("agg_measurements:", agg_measurements) - # create a new Extra-P experiment, one for each phase model experiment = Experiment() @@ -1480,11 +1487,13 @@ def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> D SMAPE = model.hypothesis._SMAPE AR2 = model.hypothesis._AR2 RE = model.hypothesis._RE - function = str(model.hypothesis.function) + # params = [str(i) for i in experiment.parameters] + mdl = ModelWrapper( + model_gen.models[(aggregated_callpath, metric)], parameters) if add_stats is True: aggregated_df.loc[len(aggregated_df)] = [ "aggregated_nodes", - function, + mdl, RSS, rRSS, SMAPE, @@ -1494,14 +1503,73 @@ def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> D else: aggregated_df.loc[len(aggregated_df)] = [ "aggregated_nodes", - function, + mdl, ] return aggregated_df def multi_display_one_parameter_model(model_objects): - pass + + functions = [] + for model_object in model_objects: + functions.append(model_object.mdl.hypothesis.function) + + # create scientific representation of created performance models + scientific_functions = convert_functions_to_scientific_notations( + functions + ) + + # sort based on x values + measures_sorted = sorted( + model_objects[0].mdl.measurements, key=lambda x: x.coordinate[0]) + + # compute means, medians, mins, maxes + params = [ms.coordinate[0] for ms in measures_sorted] # X values + + # x value plotting range, dynamic based off what the largest/smallest values are + x_vals = np.arange( + params[0], 1.5 * params[-1], (params[-1] - params[0]) / 100.0 + ) + + y_vals_list = [] + for model_object in model_objects: + # compute y values for plotting + y_vals = [model_object.mdl.hypothesis.function.evaluate( + x) for x in x_vals] + y_vals_list.append(y_vals) + + plt.ioff() + fig, ax = plt.subplots() + + range_values = np.arange( + 0, 1, 1 / len(model_objects)) + if len(model_objects) <= 20: + colormap = "tab20" + else: + colormap = "Spectral" + cmap = mpl.cm.get_cmap(colormap) + rgbas = [] + for value in range_values: + rgba = cmap(value) + rgbas.append(rgba) + + # plot the model + for i in range(len(model_objects)): + ax.plot(x_vals, y_vals_list[i], + label=str(model_objects[i].mdl.callpath) + + ": "+scientific_functions[i], + color=rgbas[i]) + + # plot axes and titles + ax.set_xlabel(model_objects[0].parameters[0] + " $" + + str(DEFAULT_PARAM_NAMES[0])+"$") + ax.set_ylabel(model_objects[0].mdl.metric) + + # plot legend + ax.legend(loc=1) + + return fig, ax def multi_display_two_parameter_model(model_objects): @@ -1529,6 +1597,41 @@ def multi_display_two_parameter_model(model_objects): for value in range_values: rgba = cmap(value) rgbas.append(rgba) + sorted_colors = {} + for rgba in rgbas: + luminance = sqrt(0.299*rgba[0]**2 + 0.587 * + rgba[1]**2 + 0.114*rgba[2]**2) + sorted_colors[luminance] = rgba + sorted_colors_keys = list(sorted_colors.keys()) + sorted_colors_keys.sort() + sorted_colors = {i: sorted_colors[i] for i in sorted_colors_keys} + rgbas = [] + for _, value in sorted_colors.items(): + x = (value[0], value[1], value[2]) + rgbas.append(x) + + # Model object hypothesis function + measures = model_objects[0].mdl.measurements + xmax = None + ymax = None + for measure in measures: + x = measure.coordinate._values[0] + y = measure.coordinate._values[1] + if xmax is None: + xmax = x + else: + if x > xmax: + xmax = x + if ymax is None: + ymax = y + else: + if y > ymax: + ymax = y + eval_results = {} + for function in functions: + result = function.evaluate((xmax, ymax)) + eval_results[result] = function + print("DEBUG eval_results:", eval_results) # create dict for legend color and markers dict_callpath_color = {} @@ -1546,6 +1649,26 @@ def multi_display_two_parameter_model(model_objects): fig = plt.figure() ax = fig.add_subplot(projection='3d') + sorted_model_object = {} + for model_object in model_objects: + result = None + for key, value in eval_results.items(): + if value == model_object.mdl.hypothesis.function: + result = key + break + sorted_model_object[result] = model_object + + sorted_model_object_keys = list(sorted_model_object.keys()) + sorted_model_object_keys.sort() + sorted_model_object = { + i: sorted_model_object[i] for i in sorted_model_object_keys} + + print("DEBUG sorted_model_object:", sorted_model_object) + + model_objects = [] + for _, value in sorted_model_object.items(): + model_objects.append(value) + # sort based on x and y values measures_sorted = sorted( model_objects[0].mdl.measurements, key=lambda x: ( @@ -1558,17 +1681,20 @@ def multi_display_two_parameter_model(model_objects): X, Y, Z_List, z_List = calculate_z_models( maxX, maxY, model_objects, parameters) + temp = [1, 2, 0] for i in range(len(Z_List)): ax.plot_surface( - X, Y, Z_List[i], label="optimal scaling", + X, Y, Z_List[i], rstride=1, cstride=1, antialiased=False, - alpha=0.3, color=rgbas[i]) + alpha=0.3, color=rgbas[temp[i]]) # axis labels and title - ax.set_xlabel(model_objects[0].parameters[0] + " $p$") - ax.set_ylabel(model_objects[0].parameters[1] + " $q$") + ax.set_xlabel(model_objects[0].parameters[0] + + " $"+str(DEFAULT_PARAM_NAMES[0])+"$") + ax.set_ylabel(model_objects[0].parameters[1] + + " $"+str(DEFAULT_PARAM_NAMES[1])+"$") ax.set_zlabel(model_objects[0].mdl.metric) # draw the legend @@ -1700,9 +1826,9 @@ def convert_functions_to_scientific_notations(model_functions: list[Function]) - scientific_function = scientific_function.replace("(", "{") scientific_function = scientific_function.replace(")", "}") scientific_function = scientific_function.replace( - "log2{p}", "\\log_2(p)") + "log2{"+str(DEFAULT_PARAM_NAMES[0])+"}", "\\log_2("+str(DEFAULT_PARAM_NAMES[0])+")") scientific_function = scientific_function.replace( - "log2{q}", "\\log_2(q)") + "log2{"+str(DEFAULT_PARAM_NAMES[1])+"}", "\\log_2("+str(DEFAULT_PARAM_NAMES[1])+")") scientific_function = "$" + scientific_function + "$" scientific_functions.append(scientific_function) return scientific_functions From d33ce04799766a07d060dc8b2fc7f325ba64e60b Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Tue, 19 Sep 2023 16:01:05 -0700 Subject: [PATCH 39/57] produce_aggregated_model() multi metric support bug fix --- thicket/model_extrap.py | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index a9e12bd9..ed2dd6c3 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -1422,11 +1422,19 @@ def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> D measurement_list[i].coordinate measurement_list[i].median if measurement_list[i].coordinate not in agg_measurements: - agg_measurements[measurement_list[i] - .coordinate] = measurement_list[i].median + if use_median is True: + agg_measurements[measurement_list[i] + .coordinate] = measurement_list[i].median + else: + agg_measurements[measurement_list[i] + .coordinate] = measurement_list[i].mean else: - agg_measurements[measurement_list[i] - .coordinate] += measurement_list[i].median + if use_median is True: + agg_measurements[measurement_list[i] + .coordinate] += measurement_list[i].median + else: + agg_measurements[measurement_list[i] + .coordinate] += measurement_list[i].mean agg_measurements_list.append(agg_measurements) # create a new Extra-P experiment, one for each phase model @@ -1480,6 +1488,7 @@ def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> D aggregated_df.insert(len(aggregated_df.columns), str(metric)+"_RSS_extrap-model", None) + new_row = ["aggregated_nodes"] for metric in self.metrics: model = model_gen.models[(aggregated_callpath, metric)] RSS = model.hypothesis._RSS @@ -1487,25 +1496,16 @@ def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> D SMAPE = model.hypothesis._SMAPE AR2 = model.hypothesis._AR2 RE = model.hypothesis._RE - # params = [str(i) for i in experiment.parameters] mdl = ModelWrapper( model_gen.models[(aggregated_callpath, metric)], parameters) if add_stats is True: - aggregated_df.loc[len(aggregated_df)] = [ - "aggregated_nodes", - mdl, - RSS, - rRSS, - SMAPE, - AR2, - RE, - ] - else: - aggregated_df.loc[len(aggregated_df)] = [ - "aggregated_nodes", - mdl, - ] - + new_row.append(mdl) + new_row.append(RSS) + new_row.append(rRSS) + new_row.append(SMAPE) + new_row.append(AR2) + new_row.append(RE) + aggregated_df.loc[len(aggregated_df)] = new_row return aggregated_df From 2f9e5f459739775c314b44a6a0867e1079e22a88 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Tue, 19 Sep 2023 16:38:40 -0700 Subject: [PATCH 40/57] multi_display() color chosen by dominant scaling --- thicket/model_extrap.py | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index ed2dd6c3..75ae7088 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -1628,10 +1628,10 @@ def multi_display_two_parameter_model(model_objects): if y > ymax: ymax = y eval_results = {} - for function in functions: - result = function.evaluate((xmax, ymax)) - eval_results[result] = function - print("DEBUG eval_results:", eval_results) + for model_object in model_objects: + function = model_object.mdl.hypothesis.function + result = function.evaluate((xmax*1.5, ymax*1.5)) + eval_results[result] = (function, model_object) # create dict for legend color and markers dict_callpath_color = {} @@ -1649,25 +1649,14 @@ def multi_display_two_parameter_model(model_objects): fig = plt.figure() ax = fig.add_subplot(projection='3d') - sorted_model_object = {} - for model_object in model_objects: - result = None - for key, value in eval_results.items(): - if value == model_object.mdl.hypothesis.function: - result = key - break - sorted_model_object[result] = model_object - - sorted_model_object_keys = list(sorted_model_object.keys()) - sorted_model_object_keys.sort() - sorted_model_object = { - i: sorted_model_object[i] for i in sorted_model_object_keys} - - print("DEBUG sorted_model_object:", sorted_model_object) + sorted_eval_results_keys = list(eval_results.keys()) + sorted_eval_results_keys.sort() + eval_results = { + i: eval_results[i] for i in sorted_eval_results_keys} model_objects = [] - for _, value in sorted_model_object.items(): - model_objects.append(value) + for _, value in eval_results.items(): + model_objects.append(value[1]) # sort based on x and y values measures_sorted = sorted( @@ -1681,14 +1670,13 @@ def multi_display_two_parameter_model(model_objects): X, Y, Z_List, z_List = calculate_z_models( maxX, maxY, model_objects, parameters) - temp = [1, 2, 0] for i in range(len(Z_List)): ax.plot_surface( X, Y, Z_List[i], rstride=1, cstride=1, antialiased=False, - alpha=0.3, color=rgbas[temp[i]]) + alpha=0.3, color=rgbas[i]) # axis labels and title ax.set_xlabel(model_objects[0].parameters[0] + From ef6b94f722e09fd27e5f1c14baa2fa969199c734 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Thu, 5 Oct 2023 15:17:39 -0700 Subject: [PATCH 41/57] refactored extrap modeling code --- thicket/ensemble.py | 47 +++-- thicket/model_extrap.py | 379 ++++++++++++---------------------------- 2 files changed, 142 insertions(+), 284 deletions(-) diff --git a/thicket/ensemble.py b/thicket/ensemble.py index 62f38e5a..149e3686 100644 --- a/thicket/ensemble.py +++ b/thicket/ensemble.py @@ -76,19 +76,23 @@ def _check_structures(): """Check that the structures of the thicket objects are valid for the incoming operations.""" # Required/expected format of the data for th in thickets: - verify_thicket_structures(th.dataframe, index=["node", "profile"]) - verify_thicket_structures(th.statsframe.dataframe, index=["node"]) + verify_thicket_structures( + th.dataframe, index=["node", "profile"]) + verify_thicket_structures( + th.statsframe.dataframe, index=["node"]) verify_thicket_structures(th.metadata, index=["profile"]) # Check for metadata_key in metadata if metadata_key: for th in thickets: - verify_thicket_structures(th.metadata, columns=[metadata_key]) + verify_thicket_structures( + th.metadata, columns=[metadata_key]) # Check length of profiles match for i in range(len(thickets) - 1): if len(thickets[i].profile) != len(thickets[i + 1].profile): raise ValueError( "Length of all thicket profiles must match. {} != {}".format( - len(thickets[i].profile), len(thickets[i + 1].profile) + len(thickets[i].profile), len( + thickets[i + 1].profile) ) ) # Ensure all thickets profiles are sorted. Must be true when metadata_key=None to @@ -123,16 +127,19 @@ def _handle_metadata(): thickets_cp[i].metadata.reset_index(drop=True, inplace=True) if metadata_key is None: for i in range(len(thickets_cp)): - thickets_cp[i].metadata.index.set_names("profile", inplace=True) + thickets_cp[i].metadata.index.set_names( + "profile", inplace=True) else: for i in range(len(thickets_cp)): - thickets_cp[i].metadata.set_index(metadata_key, inplace=True) + thickets_cp[i].metadata.set_index( + metadata_key, inplace=True) thickets_cp[i].metadata.sort_index(inplace=True) # Create multi-index columns for i in range(len(thickets_cp)): thickets_cp[i].metadata.columns = pd.MultiIndex.from_tuples( - _create_multiindex_columns(thickets_cp[i].metadata, headers[i]) + _create_multiindex_columns( + thickets_cp[i].metadata, headers[i]) ) # Concat metadata together @@ -144,11 +151,13 @@ def _handle_metadata(): def _handle_misc(): """Misceallaneous Thicket object operations.""" for i in range(1, len(thickets_cp)): - combined_th.profile += thickets_cp[i].profile # Update "profile" object + # Update "profile" object + combined_th.profile += thickets_cp[i].profile combined_th.profile_mapping.update( thickets_cp[i].profile_mapping ) # Update "profile_mapping" object - combined_th.profile = [new_mappings[prf] for prf in combined_th.profile] + combined_th.profile = [new_mappings[prf] + for prf in combined_th.profile] profile_mapping_cp = combined_th.profile_mapping.copy() for k, v in profile_mapping_cp.items(): combined_th.profile_mapping[ @@ -178,7 +187,8 @@ def _handle_perfdata(): thickets_cp[i].add_column_from_metadata_to_ensemble( "new_profiles", drop=True ) - thickets_cp[i].dataframe.reset_index(level="profile", inplace=True) + thickets_cp[i].dataframe.reset_index( + level="profile", inplace=True) new_mappings.update( pd.Series( thickets_cp[i] @@ -188,7 +198,8 @@ def _handle_perfdata(): index=thickets_cp[i].dataframe["profile"], ).to_dict() ) - thickets_cp[i].dataframe.drop("profile", axis=1, inplace=True) + thickets_cp[i].dataframe.drop( + "profile", axis=1, inplace=True) thickets_cp[i].dataframe.set_index( "new_profiles", append=True, inplace=True ) @@ -197,8 +208,10 @@ def _handle_perfdata(): ) else: # Change second-level index to be from metadata's "metadata_key" column for i in range(len(thickets_cp)): - thickets_cp[i].add_column_from_metadata_to_ensemble(metadata_key) - thickets_cp[i].dataframe.reset_index(level="profile", inplace=True) + thickets_cp[i].add_column_from_metadata_to_ensemble( + metadata_key) + thickets_cp[i].dataframe.reset_index( + level="profile", inplace=True) new_mappings.update( pd.Series( thickets_cp[i] @@ -208,7 +221,8 @@ def _handle_perfdata(): index=thickets_cp[i].dataframe["profile"], ).to_dict() ) - thickets_cp[i].dataframe.drop("profile", axis=1, inplace=True) + thickets_cp[i].dataframe.drop( + "profile", axis=1, inplace=True) thickets_cp[i].dataframe.set_index( metadata_key, append=True, inplace=True ) @@ -367,10 +381,11 @@ def _agg_to_set(obj): # Extend dataframe unify_df = pd.concat([th.dataframe, unify_df]) # Sort by keys - unify_profile_mapping = OrderedDict(sorted(unify_profile_mapping.items())) + unify_profile_mapping = OrderedDict( + sorted(unify_profile_mapping.items())) # Insert missing rows in dataframe - unify_df = _fill_perfdata(unify_df) + # unify_df = _fill_perfdata(unify_df) # Metadata-specific operations if superthicket: diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 75ae7088..5c5c835f 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -21,6 +21,7 @@ from pandas import DataFrame from scipy.stats import rankdata from math import sqrt +import math from hatchet import node @@ -99,84 +100,6 @@ def eval(self, val: float) -> float: """ return self.mdl.hypothesis.function.evaluate(val) - def convert_coefficient_to_scientific_notation(self, coefficient: float) -> str: - """This function converts an Extra-P model coefficient into scientific - notation and returns it as a string. It also shortes the coefficients - to three decimal places. - - Args: - coefficient (float): A model coefficient from a Extra-P function. - - Returns: - str: The coefficient in scientific notation. - """ - f = mticker.ScalarFormatter(useMathText=True) - f.set_powerlimits((-3, 3)) - x = "{}".format(f.format_data(float(coefficient))) - terms = x.split(" ") - if not terms[0][:1].isnumeric(): - coeff = terms[0][1:] - try: - coeff = "{:.3f}".format(float(coeff)) - except ValueError: - pass - new_coeff = "" - new_coeff += "-" - new_coeff += coeff - for i in range(len(terms)): - if i != 0: - new_coeff += terms[i] - return new_coeff - else: - coeff = terms[0] - try: - coeff = "{:.3f}".format(float(coeff)) - except ValueError: - pass - new_coeff = "" - new_coeff += coeff - for i in range(len(terms)): - if i != 0: - new_coeff += terms[i] - return new_coeff - - def convert_function_to_scientific_notation(self, model_function: Function) -> str: - """This function converts the created performance model function into a - scientific notation in string format. - - Args: - model_function (Extra-P Model): The Extra-P Model object containing the scaling function. - - Returns: - str: The resulting scientific version of the performance function. - """ - - function_terms = len(model_function.compound_terms) - model_copy = copy.deepcopy(model_function) - model_copy.constant_coefficient = ( - self.convert_coefficient_to_scientific_notation( - model_function.constant_coefficient - ) - ) - for i in range(function_terms): - model_copy.compound_terms[ - i - ].coefficient = self.convert_coefficient_to_scientific_notation( - model_function.compound_terms[i].coefficient - ) - scientific_function = str(model_copy) - scientific_function = scientific_function.replace("+-", "-") - scientific_function = scientific_function.replace("+ -", "-") - scientific_function = scientific_function.replace("*", "\\cdot") - scientific_function = scientific_function.replace("(", "{") - scientific_function = scientific_function.replace(")", "}") - scientific_function = scientific_function.replace( - "log2{"+str(DEFAULT_PARAM_NAMES[0])+"}", "\\log_2("+str(DEFAULT_PARAM_NAMES[0])+")") - scientific_function = scientific_function.replace( - "log2{"+str(DEFAULT_PARAM_NAMES[1])+"}", "\\log_2("+str(DEFAULT_PARAM_NAMES[1])+")") - scientific_function = "$" + scientific_function + "$" - return scientific_function - def display_one_parameter_model( self, show_mean: bool = False, @@ -221,10 +144,8 @@ def display_one_parameter_model( params[0], 1.5 * params[-1], (params[-1] - params[0]) / 100.0 ) - # create a scientific representation of the created performance model - scientific_function = self.convert_function_to_scientific_notation( - self.mdl.hypothesis.function - ) + scientific_function = self.mdl.hypothesis.function.to_latex_string( + Parameter(DEFAULT_PARAM_NAMES[0])) # compute y values for plotting y_vals = [self.mdl.hypothesis.function.evaluate(x) for x in x_vals] @@ -571,9 +492,8 @@ def opt_scaling_func_user(p, q): ax.set_title(str(self.mdl.callpath) + "()") # create scientific representation of create performance model - scientific_function = self.convert_function_to_scientific_notation( - self.mdl.hypothesis.function - ) + scientific_function = self.mdl.hypothesis.function.to_latex_string( + Parameter(DEFAULT_PARAM_NAMES[0]), Parameter(DEFAULT_PARAM_NAMES[1])) # create dict for legend color and markers dict_callpath_color = {} @@ -615,7 +535,7 @@ def opt_scaling_func_user(p, q): # draw the legend self.draw_legend(ax, dict_callpath_color, - len(str(scientific_function))) + len(scientific_function)) return fig, ax @@ -1079,32 +999,33 @@ def _componentize_function( # Dictionary of variables mapped to coefficients term_dict = {} # Model object hypothesis function - fnc = model_object.mdl.hypothesis.function - # Constant "c" column - term_dict["c"] = fnc.constant_coefficient - - # Terms of form "coefficient * variables" - for term in fnc.compound_terms: - if len(parameters) == 1: - # Join variables of the same term together - variable_column = " * ".join(t.to_string() - for t in term.simple_terms) - - term_dict[variable_column] = term.coefficient - else: - x = term.parameter_term_pairs - term_str = "" - for i in range(len(x)): - # [0] is the x mpterm - # [1] is the term object - term_parameter_str = DEFAULT_PARAM_NAMES[x[i][0]] - y = x[i][1].to_string(parameter=term_parameter_str) - if i == 0: - term_str += y - else: - term_str = term_str + " * " + y + if not isinstance(model_object, float): + fnc = model_object.mdl.hypothesis.function + # Constant "c" column + term_dict["c"] = fnc.constant_coefficient + + # Terms of form "coefficient * variables" + for term in fnc.compound_terms: + if len(parameters) == 1: + # Join variables of the same term together + variable_column = " * ".join(t.to_string() + for t in term.simple_terms) + + term_dict[variable_column] = term.coefficient + else: + x = term.parameter_term_pairs + term_str = "" + for i in range(len(x)): + # [0] is the x mpterm + # [1] is the term object + term_parameter_str = DEFAULT_PARAM_NAMES[x[i][0]] + y = x[i][1].to_string(parameter=term_parameter_str) + if i == 0: + term_str += y + else: + term_str = term_str + " * " + y - term_dict[term_str] = term.coefficient + term_dict[term_str] = term.coefficient return term_dict @@ -1176,87 +1097,90 @@ def _analyze_complexity( """ # Model object hypothesis function - fnc = model_object.mdl.hypothesis.function - complexity_class = "" - coefficient = 0 - return_value = {} - term_values = [] - terms = [] - - target_str = "(" - for param_value in eval_target: - target_str += str(param_value) - target_str += "," - target_str = target_str[:-1] - target_str += ")" - - if len(fnc.compound_terms) == 0: - complexity_class = "1" - coefficient = fnc.constant_coefficient - return_value[col + "_complexity_" + target_str] = complexity_class - return_value[col + "_coefficient_" + target_str] = coefficient + if not isinstance(model_object, float): + fnc = model_object.mdl.hypothesis.function + complexity_class = "" + coefficient = 0 + + term_values = [] + terms = [] + + target_str = "(" + for param_value in eval_target: + target_str += str(param_value) + target_str += "," + target_str = target_str[:-1] + target_str += ")" + + if len(fnc.compound_terms) == 0: + complexity_class = "1" + coefficient = fnc.constant_coefficient + return_value[col + "_complexity_" + + target_str] = complexity_class + return_value[col + "_coefficient_" + target_str] = coefficient - else: - if len(parameters) == 1: - for term in fnc.compound_terms: - result = term.evaluate(eval_target[0]) - term_values.append(result) - terms.append(term) else: - for term in fnc.compound_terms: - result = term.evaluate(eval_target) - term_values.append(result) - terms.append(term) - - max_index = term_values.index(max(term_values)) - - if max(term_values) > fnc.constant_coefficient: - comp = "" if len(parameters) == 1: - for simple_term in terms[max_index].simple_terms: - if comp == "": - comp += simple_term.to_string() - else: - comp = comp + "*" + simple_term.to_string() - comp = comp.replace("^", "**") - complexity_class = "" + comp + "" - coefficient = terms[max_index].coefficient - return_value[col + "_complexity_" + - target_str] = complexity_class - return_value[col + "_coefficient_" + - target_str] = coefficient + for term in fnc.compound_terms: + result = term.evaluate(eval_target[0]) + term_values.append(result) + terms.append(term) else: + for term in fnc.compound_terms: + result = term.evaluate(eval_target) + term_values.append(result) + terms.append(term) + + max_index = term_values.index(max(term_values)) + + if max(term_values) > fnc.constant_coefficient: comp = "" - for parameter_term_pair in terms[max_index].parameter_term_pairs: - # [0] to get the index of the paramete - term_parameter_str = DEFAULT_PARAM_NAMES[parameter_term_pair[0]] - # [1] to get the term - if comp == "": - comp += parameter_term_pair[1].to_string( - parameter=term_parameter_str - ) - else: - comp = ( - comp - + "*" - + parameter_term_pair[1].to_string( + if len(parameters) == 1: + for simple_term in terms[max_index].simple_terms: + if comp == "": + comp += simple_term.to_string() + else: + comp = comp + "*" + simple_term.to_string() + comp = comp.replace("^", "**") + complexity_class = "" + comp + "" + coefficient = terms[max_index].coefficient + return_value[col + "_complexity_" + + target_str] = complexity_class + return_value[col + "_coefficient_" + + target_str] = coefficient + else: + comp = "" + for parameter_term_pair in terms[max_index].parameter_term_pairs: + # [0] to get the index of the paramete + term_parameter_str = DEFAULT_PARAM_NAMES[parameter_term_pair[0]] + # [1] to get the term + if comp == "": + comp += parameter_term_pair[1].to_string( parameter=term_parameter_str ) - ) - comp = comp.replace("^", "**") - complexity_class = "" + comp + "" + else: + comp = ( + comp + + "*" + + parameter_term_pair[1].to_string( + parameter=term_parameter_str + ) + ) + comp = comp.replace("^", "**") + complexity_class = "" + comp + "" + return_value[col + "_complexity_" + + target_str] = complexity_class + return_value[col + "_coefficient_" + + target_str] = term.coefficient + + else: + complexity_class = "1" + coefficient = fnc.constant_coefficient return_value[col + "_complexity_" + target_str] = complexity_class return_value[col + "_coefficient_" + - target_str] = term.coefficient - - else: - complexity_class = "1" - coefficient = fnc.constant_coefficient - return_value[col + "_complexity_" + - target_str] = complexity_class - return_value[col + "_coefficient_" + target_str] = coefficient + target_str] = coefficient return return_value @@ -1359,9 +1283,12 @@ def complexity_statsframe( total_metric_value = 0 metric_values = [] for model_obj in self.tht.statsframe.dataframe[col]: - metric_value = model_obj.mdl.hypothesis.function.evaluate( - target - ) + if not isinstance(model_obj, float): + metric_value = model_obj.mdl.hypothesis.function.evaluate( + target + ) + else: + metric_value = math.nan total_metric_value += metric_value metric_values.append(metric_value) percentages = [] @@ -1778,87 +1705,3 @@ def calculate_z_optimized(X, Y, function, parameters, maxX, maxY): z_value = function.evaluate(points) return z_value - - -def convert_functions_to_scientific_notations(model_functions: list[Function]) -> list[str]: - """This function converts the created performance model function into a - scientific notation in string format. - - Args: - list[model_function] (Extra-P Model): The Extra-P Model object list containing the scaling functions. - - Returns: - list[str]: The resulting scientific version of the performance functions in a list. - """ - - scientific_functions = [] - for model_function in model_functions: - - function_terms = len(model_function.compound_terms) - model_copy = copy.deepcopy(model_function) - model_copy.constant_coefficient = ( - convert_coefficient_to_science_notation( - model_function.constant_coefficient - ) - ) - for i in range(function_terms): - model_copy.compound_terms[ - i - ].coefficient = convert_coefficient_to_science_notation( - model_function.compound_terms[i].coefficient - ) - scientific_function = str(model_copy) - scientific_function = scientific_function.replace("+-", "-") - scientific_function = scientific_function.replace("+ -", "-") - scientific_function = scientific_function.replace("*", "\\cdot") - scientific_function = scientific_function.replace("(", "{") - scientific_function = scientific_function.replace(")", "}") - scientific_function = scientific_function.replace( - "log2{"+str(DEFAULT_PARAM_NAMES[0])+"}", "\\log_2("+str(DEFAULT_PARAM_NAMES[0])+")") - scientific_function = scientific_function.replace( - "log2{"+str(DEFAULT_PARAM_NAMES[1])+"}", "\\log_2("+str(DEFAULT_PARAM_NAMES[1])+")") - scientific_function = "$" + scientific_function + "$" - scientific_functions.append(scientific_function) - return scientific_functions - - -def convert_coefficient_to_science_notation(coefficient: float) -> str: - """This function converts an Extra-P model coefficient into scientific - notation and returns it as a string. It also shortes the coefficients - to three decimal places. - - Args: - coefficient (float): A model coefficient from a Extra-P function. - - Returns: - str: The coefficient in scientific notation. - """ - f = mticker.ScalarFormatter(useMathText=True) - f.set_powerlimits((-3, 3)) - x = "{}".format(f.format_data(float(coefficient))) - terms = x.split(" ") - if not terms[0][:1].isnumeric(): - coeff = terms[0][1:] - try: - coeff = "{:.3f}".format(float(coeff)) - except ValueError: - pass - new_coeff = "" - new_coeff += "-" - new_coeff += coeff - for i in range(len(terms)): - if i != 0: - new_coeff += terms[i] - return new_coeff - else: - coeff = terms[0] - try: - coeff = "{:.3f}".format(float(coeff)) - except ValueError: - pass - new_coeff = "" - new_coeff += coeff - for i in range(len(terms)): - if i != 0: - new_coeff += terms[i] - return new_coeff From 878c5aae0a6ec051efb1ce01312c5acfedf326a6 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Tue, 10 Oct 2023 17:52:21 -0700 Subject: [PATCH 42/57] extrap models shown in latex encoding in dataframe, bugfixes --- thicket/model_extrap.py | 64 ++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 5c5c835f..21146196 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -79,7 +79,6 @@ def __init__(self, mdl: Model, parameters: list[str]) -> None: """ self.mdl = mdl self.parameters = parameters - self.default_param_names = DEFAULT_PARAM_NAMES def __str__(self) -> str: """Returns the Extra-P performance model function as a string. @@ -87,7 +86,14 @@ def __str__(self) -> str: Returns: str: The Extra-P performance model function. """ - return str(self.mdl.hypothesis.function) + if len(self.parameters) == 1: + return self.mdl.hypothesis.function.to_latex_string(Parameter(DEFAULT_PARAM_NAMES[0])) + elif len(self.parameters) == 2: + return self.mdl.hypothesis.function.to_latex_string(Parameter(DEFAULT_PARAM_NAMES[0]), Parameter(DEFAULT_PARAM_NAMES[1])) + elif len(self.parameters) == 3: + return self.mdl.hypothesis.function.to_latex_string(Parameter(DEFAULT_PARAM_NAMES[0]), Parameter(DEFAULT_PARAM_NAMES[1]), Parameter(DEFAULT_PARAM_NAMES[2])) + else: + return 1 def eval(self, val: float) -> float: """Evaluates the performance model function using a given value and returns the result. @@ -1343,25 +1349,27 @@ def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> D if parameters is None: parameters = self.tht.statsframe.dataframe[ str(metric)+"_extrap-model"].iloc[i].parameters - measurement_list = self.tht.statsframe.dataframe[ - str(metric)+"_extrap-model"].iloc[i].mdl.measurements - for i in range(len(measurement_list)): - measurement_list[i].coordinate - measurement_list[i].median - if measurement_list[i].coordinate not in agg_measurements: - if use_median is True: - agg_measurements[measurement_list[i] - .coordinate] = measurement_list[i].median - else: - agg_measurements[measurement_list[i] - .coordinate] = measurement_list[i].mean - else: - if use_median is True: - agg_measurements[measurement_list[i] - .coordinate] += measurement_list[i].median + if not isinstance(self.tht.statsframe.dataframe[ + str(metric)+"_extrap-model"].iloc[i], float): + measurement_list = self.tht.statsframe.dataframe[ + str(metric)+"_extrap-model"].iloc[i].mdl.measurements + for i in range(len(measurement_list)): + measurement_list[i].coordinate + measurement_list[i].median + if measurement_list[i].coordinate not in agg_measurements: + if use_median is True: + agg_measurements[measurement_list[i] + .coordinate] = measurement_list[i].median + else: + agg_measurements[measurement_list[i] + .coordinate] = measurement_list[i].mean else: - agg_measurements[measurement_list[i] - .coordinate] += measurement_list[i].mean + if use_median is True: + agg_measurements[measurement_list[i] + .coordinate] += measurement_list[i].median + else: + agg_measurements[measurement_list[i] + .coordinate] += measurement_list[i].mean agg_measurements_list.append(agg_measurements) # create a new Extra-P experiment, one for each phase model @@ -1439,13 +1447,11 @@ def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> D def multi_display_one_parameter_model(model_objects): functions = [] + scientific_functions = [] for model_object in model_objects: functions.append(model_object.mdl.hypothesis.function) - - # create scientific representation of created performance models - scientific_functions = convert_functions_to_scientific_notations( - functions - ) + scientific_functions.append( + model_object.mdl.hypothesis.function.to_latex_string(Parameter(DEFAULT_PARAM_NAMES[0]))) # sort based on x values measures_sorted = sorted( @@ -1504,13 +1510,11 @@ def multi_display_two_parameter_model(model_objects): parameters = model_objects[0].parameters functions = [] + scientific_functions = [] for model_object in model_objects: functions.append(model_object.mdl.hypothesis.function) - - # create scientific representation of created performance models - scientific_functions = convert_functions_to_scientific_notations( - functions - ) + scientific_functions.append( + model_object.mdl.hypothesis.function.to_latex_string(Parameter(DEFAULT_PARAM_NAMES[0]), Parameter(DEFAULT_PARAM_NAMES[1]))) # chose the color map to take the colors from dynamically range_values = np.arange( From 192ad9d2926b3cee8df266ff259fdd292dcb198c Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Fri, 20 Oct 2023 15:28:06 -0700 Subject: [PATCH 43/57] extrap modeler configurtion support and multi column index for modeling output in thicket.statsframe.dataframe --- thicket/model_extrap.py | 419 ++++++++++++++++++++++++++-------------- 1 file changed, 274 insertions(+), 145 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 21146196..9441a8c5 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -6,7 +6,7 @@ import base64 import copy from io import BytesIO - +from itertools import chain from typing import Tuple import matplotlib as mpl @@ -24,9 +24,14 @@ import math from hatchet import node +from thicket.thicket import Thicket from extrap.fileio import io_helper from extrap.modelers.model_generator import ModelGenerator +from extrap.modelers.multi_parameter.multi_parameter_modeler import MultiParameterModeler +from extrap.util.options_parser import SINGLE_PARAMETER_MODELER_KEY, SINGLE_PARAMETER_OPTIONS_KEY +from extrap.modelers import single_parameter +from extrap.modelers import multi_parameter from extrap.entities.experiment import Experiment from extrap.entities.parameter import Parameter from extrap.fileio.io_helper import create_call_tree @@ -42,6 +47,20 @@ MODEL_TAG = "_extrap-model" +class ModelerOptions(): + """ + """ + + def __init__(self, allow_log_terms: bool = True, + use_crossvalidation: bool = True, + compare_with_RSS: bool = False): + """ + """ + self.allow_log_terms = allow_log_terms + self.use_crossvalidation = use_crossvalidation + self.compare_with_RSS = compare_with_RSS + + class ExtrapReaderException(Exception): """Custom exception class for raising exceptions while reading in data from a pandas type dataframe from a thicket object into the Extra-P experiment object. @@ -70,7 +89,7 @@ class ModelWrapper: the model. """ - def __init__(self, mdl: Model, parameters: list[str]) -> None: + def __init__(self, mdl: Model, parameters: list[str], name: str) -> None: """Init function of the ModelWrapper class. Args: @@ -79,6 +98,8 @@ def __init__(self, mdl: Model, parameters: list[str]) -> None: """ self.mdl = mdl self.parameters = parameters + self.default_param_names = DEFAULT_PARAM_NAMES + self.name = name def __str__(self) -> str: """Returns the Extra-P performance model function as a string. @@ -87,13 +108,23 @@ def __str__(self) -> str: str: The Extra-P performance model function. """ if len(self.parameters) == 1: - return self.mdl.hypothesis.function.to_latex_string(Parameter(DEFAULT_PARAM_NAMES[0])) + return self.mdl.hypothesis.function.to_latex_string( + Parameter(DEFAULT_PARAM_NAMES[0]) + ) elif len(self.parameters) == 2: - return self.mdl.hypothesis.function.to_latex_string(Parameter(DEFAULT_PARAM_NAMES[0]), Parameter(DEFAULT_PARAM_NAMES[1])) + return self.mdl.hypothesis.function.to_latex_string( + Parameter(DEFAULT_PARAM_NAMES[0]), + Parameter(DEFAULT_PARAM_NAMES[1]) + ) elif len(self.parameters) == 3: - return self.mdl.hypothesis.function.to_latex_string(Parameter(DEFAULT_PARAM_NAMES[0]), Parameter(DEFAULT_PARAM_NAMES[1]), Parameter(DEFAULT_PARAM_NAMES[2])) + return self.mdl.hypothesis.function.to_latex_string( + Parameter(DEFAULT_PARAM_NAMES[0]), + Parameter(DEFAULT_PARAM_NAMES[1]), + Parameter(DEFAULT_PARAM_NAMES[2]) + ) else: return 1 + # return str(self.mdl.hypothesis.function) def eval(self, val: float) -> float: """Evaluates the performance model function using a given value and returns the result. @@ -215,7 +246,7 @@ def display_one_parameter_model( ) # otherwise try to figure out the optimal scaling curve automatically else: - if self.parameters[0] == "jobsize": + if parameters[0] == "jobsize": y_vals_opt = [] for _ in range(len(y_vals)): y_vals_opt.append(y_vals[0]) @@ -227,7 +258,7 @@ def display_one_parameter_model( ) # plot axes and titles - ax.set_xlabel(self.parameters[0] + " $" + + ax.set_xlabel(parameters[0] + " $" + str(DEFAULT_PARAM_NAMES[0])+"$") ax.set_ylabel(self.mdl.metric) ax.set_title(str(self.mdl.callpath) + "()") @@ -421,8 +452,8 @@ def opt_scaling_func_user(p, q): # otherwise try to figure out the optimal scaling curve automatically else: if ( - self.parameters[0] == "jobsize" - and self.parameters[1] == "problem_size" + parameters[0] == "jobsize" + and parameters[1] == "problem_size" ): z_vals_opt = opt_scaling_func_auto(x_vals, y_vals) ax.plot_surface( @@ -490,9 +521,9 @@ def opt_scaling_func_user(p, q): ax.plot(line_x, line_y, line_z, color="black") # axis labels and title - ax.set_xlabel(self.parameters[0] + " $" + + ax.set_xlabel(parameters[0] + " $" + str(DEFAULT_PARAM_NAMES[0])+"$") - ax.set_ylabel(self.parameters[1] + " $" + + ax.set_ylabel(parameters[1] + " $" + str(DEFAULT_PARAM_NAMES[1])+"$") ax.set_zlabel(self.mdl.metric) ax.set_title(str(self.mdl.callpath) + "()") @@ -576,7 +607,7 @@ def display( """ # check number of model parameters - if len(self.parameters) == 1: + if len(parameters) == 1: fig, ax = self.display_one_parameter_model( show_mean, show_median, @@ -587,7 +618,7 @@ def display( opt_scaling_func, ) - elif len(self.parameters) == 2: + elif len(parameters) == 2: fig, ax = self.display_two_parameter_model( show_mean, show_median, @@ -601,49 +632,21 @@ def display( else: raise Exception( "Plotting performance models with " - + str(len(self.parameters)) + + str(len(parameters)) + " parameters is currently not supported." ) return fig, ax -class Modeling: - """Produce models for all the metrics across the given graphframes.""" - - def __init__( - self, tht, parameters: list[str] = None, metrics: list[str] = None - ) -> None: - """Create a new model object. - - Adds a model column for each metric for each common frame across all the - graphframes. - - The given list of params contains the parameters to build the models. For - example, MPI ranks, input sizes, and so on. - - Arguments: - tht (Thicket): thicket object - parameters (list): A list of String values of the parameters that will be considered for - modeling by Extra-P. - metrics (list): A list of String value of the metrics Extra-P will create models for. - """ - self.tht = tht - - # if there were no parameters provided use the jobsize to create models, - # which should always be available - if not parameters: - self.parameters = ["jobsize"] - else: - self.parameters = parameters +class ExtrapInterface: + """A class that functions as an interface between Thicket and Extra-P + to load the data from a thicket into Extra-P, create performance models, + append them to a thicket, and display the models.""" - # if no metrics have been provided create models for all existing metrics - if not metrics: - self.metrics = self.tht.exc_metrics + self.tht.inc_metrics - else: - self.metrics = metrics - - self.experiment = None + def __init__(self) -> None: + """Create a new Extra-P Interface object.""" + pass def to_html( self, @@ -713,82 +716,119 @@ def model_to_img_html(model_obj: Model) -> str: met + MODEL_TAG: model_to_img_html for met in existing_metrics} # Subset of the aggregated statistics table with only the Extra-P columns selected - return self.tht.statsframe.dataframe[ + return thicket.statsframe.dataframe[ [met + MODEL_TAG for met in existing_metrics] ].to_html(escape=False, formatters=frm_dict) - def _add_extrap_statistics(self, node: node, metric: str) -> None: + def _add_extrap_statistics(self, tht: Thicket, node: node, metric: str) -> None: """Insert the Extra-P hypothesis function statistics into the aggregated - statistics table. Has to be called after "produce_models". + statistics table. Has to be called after "create_models". Arguments: node (hatchet.node): The node for which statistics should be calculated metric (str): The metric for which statistics should be calculated """ - hypothesis_fn = self.tht.statsframe.dataframe.at[ + hypothesis_fn = tht.statsframe.dataframe.at[ node, metric + MODEL_TAG ].mdl.hypothesis - self.tht.statsframe.dataframe.at[ + tht.statsframe.dataframe.at[ node, metric + "_RSS" + MODEL_TAG ] = hypothesis_fn.RSS - self.tht.statsframe.dataframe.at[ + tht.statsframe.dataframe.at[ node, metric + "_rRSS" + MODEL_TAG ] = hypothesis_fn.rRSS - self.tht.statsframe.dataframe.at[ + tht.statsframe.dataframe.at[ node, metric + "_SMAPE" + MODEL_TAG ] = hypothesis_fn.SMAPE - self.tht.statsframe.dataframe.at[ + tht.statsframe.dataframe.at[ node, metric + "_AR2" + MODEL_TAG ] = hypothesis_fn.AR2 - self.tht.statsframe.dataframe.at[ + tht.statsframe.dataframe.at[ node, metric + "_RE" + MODEL_TAG ] = hypothesis_fn.RE - def produce_models( - self, - use_median: bool = True, - calc_total_metrics: bool = False, - scaling_parameter: str = "jobsize", - add_stats: bool = True, - ) -> None: - """Produces an Extra-P model. Models are generated by calling Extra-P's - ModelGenerator. + def create_models(self, + tht: Thicket, + parameters: list[str] = None, + metrics: list[str] = None, + use_median: bool = True, + calc_total_metrics: bool = False, + scaling_parameter: str = "jobsize", + add_stats: bool = True, + modeler: str = "default", + model_name: str = "default_model", + allow_log_terms: bool = True + ) -> None: + """Converts the data in the given thicket into a format that + can be read by Extra-P. Then the Extra-P modeler is called + with the given options and creates a performance model for + each callpath/node considering the given metrics, and model + parameters. The resulting models will be appended to the given + thicket's graphframe. Arguments: - use_median (bool): Set how Extra-P aggregates repetitions of the same - measurement configuration. If set to True, Extra-P uses the median for - model creation, otherwise it uses the mean. (Default=True) - calc_total_metrics (bool): Set calc_total_metrics to True to let Extra-P - internally calculate the total metric values for metrics measured - per MPI rank, e.g., the average runtime/rank. (Default=False) - scaling_parameter (String): Set the scaling parameter for the total metric - calculation. This parameter is only used when calc_total_metrics=True. - One needs to provide either the name of the parameter that models the - resource allocation, e.g., the jobsize, or a fixed int value as a String, - when only scaling, e.g., the problem size, and the resource allocation - is fix. (Default="jobsize") - add_stats (bool): Option to add hypothesis function statistics to the - aggregated statistics table. (Default=True) + tht (Thicket): The thicket object to get the data + from for modeling. + parameters (list): A list of String values of the parameters + that will be considered for modeling by Extra-P. (Default=None) + metrics (list): A list of String value of the metrics + Extra-P will create models for. (Default=None) + use_median (bool): Set how Extra-P aggregates repetitions + of the same measurement configuration. If set to True, + Extra-P uses the median for model creation, otherwise + it uses the mean. (Default=True) + calc_total_metrics (bool): Set calc_total_metrics to True + to let Extra-P internally calculate the total metric + values for metrics measured per MPI rank, e.g., the + average runtime/rank. (Default=False) + scaling_parameter (String): Set the scaling parameter for the + total metric calculation. This parameter is only used when + calc_total_metrics=True. One needs to provide either the + name of the parameter that models the resource allocation, + e.g., the jobsize, or a fixed int value as a String, when + only scaling, e.g., the problem size, and the resource + allocation is fix. (Default="jobsize") + add_stats (bool): Option to add hypothesis function statistics + to the aggregated statistics table. (Default=True) """ + # create a copy of the thicket to concat them later on + tht2 = copy.deepcopy(tht) + + # set the model parameters + if not parameters: + # if there are no parameters provided use the jobsize + parameters = ["jobsize"] + else: + parameters = parameters + + # set metrics for modeling + if not metrics: + # if no metrics specified create models for all metrics + metrics = tht.exc_metrics + tht.inc_metrics + else: + metrics = metrics + + self.experiment = None + # create an extra-p experiment experiment = Experiment() # create the model parameters - for parameter in self.parameters: + for parameter in parameters: experiment.add_parameter(Parameter(parameter)) # Ordering of profiles in the performance data table ensemble_profile_ordering = list( - self.tht.dataframe.index.unique(level=1)) + tht.dataframe.index.unique(level=1)) profile_parameter_value_mapping = {} for profile in ensemble_profile_ordering: profile_parameter_value_mapping[profile] = [] - for parameter in self.parameters: - current_param_mapping = self.tht.metadata[parameter].to_dict() + for parameter in parameters: + current_param_mapping = tht.metadata[parameter].to_dict() for key, value in current_param_mapping.items(): profile_parameter_value_mapping[key].append(float(value)) @@ -804,12 +844,12 @@ def produce_models( # create the callpaths # NOTE: could add calltree later on, possibly from hatchet data if available - for thicket_node, _ in self.tht.dataframe.groupby(level=0): + for thicket_node, _ in tht.dataframe.groupby(level=0): if Callpath(thicket_node.frame["name"]) not in experiment.callpaths: experiment.add_callpath(Callpath(thicket_node.frame["name"])) # create the metrics - for metric in self.metrics: + for metric in metrics: experiment.add_metric(Metric(metric)) # iteratre over coordinates @@ -823,7 +863,7 @@ def produce_models( values = [] callpath_exists = False # NOTE: potentially there is a better way to access the dataframes without looping - for thicket_node, single_node_df in self.tht.dataframe.groupby( + for thicket_node, single_node_df in tht.dataframe.groupby( level=0 ): if Callpath(thicket_node.frame["name"]) == callpath: @@ -878,7 +918,7 @@ def produce_models( # check if the parameter exists if ( scaling_parameter - in self.parameters + in parameters ): parameter_id = [ i @@ -901,7 +941,7 @@ def produce_models( "The specified scaling parameter '" + str(scaling_parameter) + "' could not be found in the passed list of model parameters " - + str(self.parameters) + + str(parameters) + ".", profile, ) @@ -963,31 +1003,119 @@ def produce_models( # check the created experiment for its validty io_helper.validate_experiment(experiment) + # TODO: these need to be set by the arguments in the function call + modelers_list = list(set(k.lower() for k in + chain(single_parameter.all_modelers.keys(), multi_parameter.all_modelers.keys()))) + print("DEBUG: modelers_list", modelers_list) + if modeler.lower() not in modelers_list: + # TODO: throw an exception (maybe custom one) saying the given modeler does not exist + # then continue with the default modeler + modeler = "default" + modeler_options = {'allow_log_terms': allow_log_terms, 'use_crossvalidation': None, 'compare_with_RSS': None, 'poly_exponents': None, + 'log_exponents': None, 'retain_default_exponents': None, 'force_combination_exponents': None, 'allow_negative_exponents': None} + # TODO: I basically want to get this dict from extra-p based on the modeler that is currently selected + # then generate options class automatically from that??? + # generate models using Extra-P model generator - model_gen = ModelGenerator( - experiment, name="Default Model", use_median=use_median + model_generator = ModelGenerator( + experiment, + modeler=modeler.lower(), + name=model_name, + use_median=use_median ) - model_gen.model_all() - experiment.add_modeler(model_gen) - # add the models, and statistics into the dataframe - for callpath in experiment.callpaths: - for metric in experiment.metrics: - mkey = (callpath, metric) - for thicket_node, _ in self.tht.dataframe.groupby(level=0): - if Callpath(thicket_node.frame["name"]) == callpath: - # catch key errors when queriying for models with a callpath, metric combination - # that does not exist because there was no measurement object created for them - try: - self.tht.statsframe.dataframe.at[ - thicket_node, str(metric) + MODEL_TAG - ] = ModelWrapper(model_gen.models[mkey], self.parameters) - # Add statistics to aggregated statistics table - if add_stats: - self._add_extrap_statistics( - thicket_node, str(metric)) - except Exception: - pass + # apply modeler options + modeler = model_generator.modeler + if isinstance(modeler, MultiParameterModeler) and modeler_options: + # set single-parameter modeler of multi-parameter modeler + single_modeler = modeler_options[SINGLE_PARAMETER_MODELER_KEY] + if single_modeler is not None: + modeler.single_parameter_modeler = single_parameter.all_modelers[single_modeler]( + ) + # apply options of single-parameter modeler + if modeler.single_parameter_modeler is not None: + for name, value in modeler_options[SINGLE_PARAMETER_OPTIONS_KEY].items(): + if value is not None: + setattr(modeler.single_parameter_modeler, name, value) + + for name, value in modeler_options.items(): + if value is not None: + setattr(modeler, name, value) + + model_generator.model_all() + + experiment.add_modeler(model_generator) + + # check if dataframe has already a multi column index + if tht.statsframe.dataframe.columns.nlevels > 1: + + # TODO: not sure if I actually need this code... + modeler_names = [] + for x in tht.statsframe.dataframe.columns.get_level_values(0).unique(): + modeler_names.append(x) + + # TODO: here I need to create the real data in a numpy array instead of generating random numbers + # TODO: second I need to put the real column names instead of the fake ones... + # TODO: do not put name column again, that one is only needed once. + tht.statsframe.dataframe = tht.statsframe.dataframe.join(pd.DataFrame(np.random.rand( + 53, 3), columns=pd.MultiIndex.from_product([[model_name], ['one', 'two', 'three']]), index=tht.statsframe.dataframe.index)) + + """# add the models, and statistics into the dataframe + for callpath in experiment.callpaths: + for metric in experiment.metrics: + mkey = (callpath, metric) + for thicket_node, _ in tht.dataframe.groupby(level=0): + if Callpath(thicket_node.frame["name"]) == callpath: + # catch key errors when queriying for models with a callpath, metric combination + # that does not exist because there was no measurement object created for them + try: + tht.statsframe.dataframe.at[thicket_node, [model_name, str( + metric) + MODEL_TAG] + ] = ModelWrapper(model_generator.models[mkey], parameters, model_name) + # Add statistics to aggregated statistics table + if add_stats: + self._add_extrap_statistics( + tht, thicket_node, str(metric)) + except Exception as e: + print(e) + pass""" + + else: + # check if there is already a extra-p model in the dataframe + model_exists = True + modeler_name = None + try: + modeler_name = tht.statsframe.dataframe.at[thicket_node, + str(metric) + MODEL_TAG].name + model_exists = True + except KeyError: + model_exists = False + + # add the models, and statistics into the dataframe + # TODO: see how I can get the name column out of the multi index, that column is only needed once!!! + for callpath in experiment.callpaths: + for metric in experiment.metrics: + mkey = (callpath, metric) + for thicket_node, _ in tht.dataframe.groupby(level=0): + if Callpath(thicket_node.frame["name"]) == callpath: + # catch key errors when queriying for models with a callpath, metric combination + # that does not exist because there was no measurement object created for them + try: + tht.statsframe.dataframe.at[ + thicket_node, str(metric) + MODEL_TAG + ] = ModelWrapper(model_generator.models[mkey], parameters, model_name) + # Add statistics to aggregated statistics table + if add_stats: + self._add_extrap_statistics( + tht, thicket_node, str(metric)) + except Exception as e: + print(e) + pass + + # if there is already a model in the dataframe, concat them and add a multi column index + if model_exists is True: + tht.statsframe.dataframe = pd.concat( + [tht2.statsframe.dataframe, tht.statsframe.dataframe], axis=1, keys=[str(modeler_name), str(model_name)]) self.experiment = experiment @@ -1047,17 +1175,17 @@ def componentize_statsframe(self, columns: list[str] = None) -> None: if columns is None: columns = [ col - for col in self.tht.statsframe.dataframe - if isinstance(self.tht.statsframe.dataframe[col].iloc[0], ModelWrapper) + for col in thicket.statsframe.dataframe + if isinstance(thicket.statsframe.dataframe[col].iloc[0], ModelWrapper) ] # Error checking for c in columns: - if c not in self.tht.statsframe.dataframe.columns: + if c not in thicket.statsframe.dataframe.columns: raise ValueError( "column " + c + " is not in the aggregated statistics table." ) - elif not isinstance(self.tht.statsframe.dataframe[c].iloc[0], ModelWrapper): + elif not isinstance(thicket.statsframe.dataframe[c].iloc[0], ModelWrapper): raise TypeError( "column " + c @@ -1069,13 +1197,14 @@ def componentize_statsframe(self, columns: list[str] = None) -> None: for col in columns: # Get list of components for this column components = [ - Modeling._componentize_function(model_obj, self.parameters) - for model_obj in self.tht.statsframe.dataframe[col] + ExtrapInterface._componentize_function( + model_obj, parameters) + for model_obj in thicket.statsframe.dataframe[col] ] # Component dataframe comp_df = pd.DataFrame( - data=components, index=self.tht.statsframe.dataframe.index + data=components, index=thicket.statsframe.dataframe.index ) # Add column name as index level @@ -1085,8 +1214,8 @@ def componentize_statsframe(self, columns: list[str] = None) -> None: all_dfs.append(comp_df) # Concatenate dataframes horizontally - all_dfs.insert(0, self.tht.statsframe.dataframe) - self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) + all_dfs.insert(0, thicket.statsframe.dataframe) + thicket.statsframe.dataframe = pd.concat(all_dfs, axis=1) def _analyze_complexity( model_object: Model, eval_target: list[float], col: str, parameters: list[str] @@ -1214,7 +1343,7 @@ def complexity_statsframe( elif len(eval_targets) > 0: # for each evaluation target check if the number of values matches the number of parameters for target in eval_targets: - if len(target) != len(self.parameters): + if len(target) != len(parameters): print( "The number of given parameter values for the evaluation target need to be the same as the number of model parameters." ) @@ -1235,22 +1364,22 @@ def complexity_statsframe( if columns is None: columns = [ col - for col in self.tht.statsframe.dataframe + for col in thicket.statsframe.dataframe if isinstance( - self.tht.statsframe.dataframe[col].iloc[0], ModelWrapper + thicket.statsframe.dataframe[col].iloc[0], ModelWrapper ) ] # Error checking for c in columns: - if c not in self.tht.statsframe.dataframe.columns: + if c not in thicket.statsframe.dataframe.columns: raise ValueError( "column " + c + " is not in the aggregated statistics table." ) elif not isinstance( - self.tht.statsframe.dataframe[c].iloc[0], ModelWrapper + thicket.statsframe.dataframe[c].iloc[0], ModelWrapper ): raise TypeError( "column " @@ -1264,15 +1393,15 @@ def complexity_statsframe( for col in columns: # Get list of components for this column components = [ - Modeling._analyze_complexity( - model_obj, target, col, self.parameters + ExtrapInterface._analyze_complexity( + model_obj, target, col, parameters ) - for model_obj in self.tht.statsframe.dataframe[col] + for model_obj in thicket.statsframe.dataframe[col] ] # Component dataframe comp_df = pd.DataFrame( - data=components, index=self.tht.statsframe.dataframe.index + data=components, index=thicket.statsframe.dataframe.index ) # Add column name as index level @@ -1280,15 +1409,15 @@ def complexity_statsframe( all_dfs.append(comp_df) # Concatenate dataframes horizontally - all_dfs.insert(0, self.tht.statsframe.dataframe) - self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) + all_dfs.insert(0, thicket.statsframe.dataframe) + thicket.statsframe.dataframe = pd.concat(all_dfs, axis=1) # Add callpath ranking to the dataframe all_dfs = [] for col in columns: total_metric_value = 0 metric_values = [] - for model_obj in self.tht.statsframe.dataframe[col]: + for model_obj in thicket.statsframe.dataframe[col]: if not isinstance(model_obj, float): metric_value = model_obj.mdl.hypothesis.function.evaluate( target @@ -1319,14 +1448,14 @@ def complexity_statsframe( # Component dataframe comp_df = pd.DataFrame( - data=ranking_list, index=self.tht.statsframe.dataframe.index + data=ranking_list, index=thicket.statsframe.dataframe.index ) all_dfs.append(comp_df) # Concatenate dataframes horizontally - all_dfs.insert(0, self.tht.statsframe.dataframe) - self.tht.statsframe.dataframe = pd.concat(all_dfs, axis=1) + all_dfs.insert(0, thicket.statsframe.dataframe) + thicket.statsframe.dataframe = pd.concat(all_dfs, axis=1) # otherwise raise an Exception else: @@ -1338,20 +1467,20 @@ def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> D """Analysis the thicket statsframe by grouping application phases such as computation and communication together to create performance models for these phases. """ - callpaths = self.tht.statsframe.dataframe["name"].values.tolist() + callpaths = thicket.statsframe.dataframe["name"].values.tolist() # aggregate measurements inside the extra-p models from all communication functions agg_measurements_list = [] parameters = None - for metric in self.metrics: + for metric in metrics: agg_measurements = {} for i in range(len(callpaths)): if parameters is None: - parameters = self.tht.statsframe.dataframe[ + parameters = thicket.statsframe.dataframe[ str(metric)+"_extrap-model"].iloc[i].parameters - if not isinstance(self.tht.statsframe.dataframe[ + if not isinstance(thicket.statsframe.dataframe[ str(metric)+"_extrap-model"].iloc[i], float): - measurement_list = self.tht.statsframe.dataframe[ + measurement_list = thicket.statsframe.dataframe[ str(metric)+"_extrap-model"].iloc[i].mdl.measurements for i in range(len(measurement_list)): measurement_list[i].coordinate @@ -1375,7 +1504,7 @@ def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> D # create a new Extra-P experiment, one for each phase model experiment = Experiment() - for metric in self.metrics: + for metric in metrics: metric = Metric(str(metric)) experiment.add_metric(metric) @@ -1386,7 +1515,7 @@ def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> D experiment.add_parameter( Parameter(str(DEFAULT_PARAM_NAMES[i]))) - for metric in self.metrics: + for metric in metrics: for key, value in agg_measurements.items(): if key not in experiment.coordinates: experiment.add_coordinate(key) @@ -1403,7 +1532,7 @@ def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> D # create empty pandas dataframe with columns only aggregated_df = pd.DataFrame(columns=["name"]) - for metric in self.metrics: + for metric in metrics: if add_stats is True: aggregated_df.insert(len(aggregated_df.columns), str(metric)+"_extrap-model", None) @@ -1424,7 +1553,7 @@ def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> D str(metric)+"_RSS_extrap-model", None) new_row = ["aggregated_nodes"] - for metric in self.metrics: + for metric in metrics: model = model_gen.models[(aggregated_callpath, metric)] RSS = model.hypothesis._RSS rRSS = model.hypothesis._rRSS From de5db033690625d98eaa4d9973b3a3d73840800f Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Fri, 20 Oct 2023 16:51:14 -0700 Subject: [PATCH 44/57] basic multi index for extrap modeling in thicket working --- thicket/model_extrap.py | 48 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 9441a8c5..f6c3541c 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -1054,11 +1054,49 @@ def create_models(self, for x in tht.statsframe.dataframe.columns.get_level_values(0).unique(): modeler_names.append(x) - # TODO: here I need to create the real data in a numpy array instead of generating random numbers - # TODO: second I need to put the real column names instead of the fake ones... - # TODO: do not put name column again, that one is only needed once. - tht.statsframe.dataframe = tht.statsframe.dataframe.join(pd.DataFrame(np.random.rand( - 53, 3), columns=pd.MultiIndex.from_product([[model_name], ['one', 'two', 'three']]), index=tht.statsframe.dataframe.index)) + # create a list with the column names + column_names = [] + column_names.append(str(metric) + MODEL_TAG) + if add_stats: + column_names.append(str(metric) + "_RSS" + MODEL_TAG) + column_names.append(str(metric) + "_rRSS" + MODEL_TAG) + column_names.append(str(metric) + "_SMAPE" + MODEL_TAG) + column_names.append(str(metric) + "_AR2" + MODEL_TAG) + column_names.append(str(metric) + "_RE" + MODEL_TAG) + + # create a numpy array containing the data + for callpath in experiment.callpaths: + for metric in experiment.metrics: + mkey = (callpath, metric) + rows = [] + for thicket_node, _ in tht.dataframe.groupby(level=0): + if Callpath(thicket_node.frame["name"]) == callpath: + # catch key errors when queriying for models with a callpath, metric combination + # that does not exist because there was no measurement object created for them + try: + values = [] + model_wrapper = ModelWrapper( + model_generator.models[mkey], parameters, model_name) + values.append(model_wrapper) + if add_stats: + values.append( + model_wrapper.mdl.hypothesis.RSS) + values.append( + model_wrapper.mdl.hypothesis.rRSS) + values.append( + model_wrapper.mdl.hypothesis.SMAPE) + values.append( + model_wrapper.mdl.hypothesis.AR2) + values.append( + model_wrapper.mdl.hypothesis.RE) + except Exception: + pass + rows.append(values) + + data = np.array(rows) + + tht.statsframe.dataframe = tht.statsframe.dataframe.join(pd.DataFrame( + data, columns=pd.MultiIndex.from_product([[model_name], column_names]), index=tht.statsframe.dataframe.index)) """# add the models, and statistics into the dataframe for callpath in experiment.callpaths: From b3a5b98c3375383e72d83e91499253f8bc99aa97 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Wed, 1 Nov 2023 18:03:45 -0700 Subject: [PATCH 45/57] added logic to print out available extra-p modelers and their options --- thicket/model_extrap.py | 104 ++++++++++++++++++++++++++-------------- 1 file changed, 68 insertions(+), 36 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index f6c3541c..f2e4e17a 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -43,22 +43,27 @@ from extrap.entities.functions import Function from extrap.entities.terms import DEFAULT_PARAM_NAMES from extrap.entities.functions import ConstantFunction +from extrap.util.options_parser import _create_parser, _add_single_parameter_options MODEL_TAG = "_extrap-model" -class ModelerOptions(): - """ +class ExtrapModelerException(Exception): + """Custom exception class for raising exceptions when the given modeler does not exist in + Extra-P. + + Args: + Exception (Exception): Python base Exception object. """ - def __init__(self, allow_log_terms: bool = True, - use_crossvalidation: bool = True, - compare_with_RSS: bool = False): - """ + def __init__(self, message: str) -> None: + """Initialization function for the custom Extra-P reader exception class. + + Args: + message (str): The message the exception should pass on. """ - self.allow_log_terms = allow_log_terms - self.use_crossvalidation = use_crossvalidation - self.compare_with_RSS = compare_with_RSS + super().__init__() + self.message = message class ExtrapReaderException(Exception): @@ -645,9 +650,48 @@ class ExtrapInterface: append them to a thicket, and display the models.""" def __init__(self) -> None: - """Create a new Extra-P Interface object.""" + """ + Create a new Extra-P Interface object. + """ pass + def print_modelers(self) -> None: + """ + Prints the available modelers in a list. + """ + modelers_list = list(set(k.lower() for k in chain( + single_parameter.all_modelers.keys(), multi_parameter.all_modelers.keys()))) + print("Available Extra-P Modeler:", modelers_list) + + def print_modeler_options(self, modeler_name: str) -> None: + """ + Prints all the modeler options available for the given modeler. + """ + modelers_list = list(set(k.lower() for k in chain( + single_parameter.all_modelers.keys(), multi_parameter.all_modelers.keys()))) + try: + if modeler_name.lower() in modelers_list: + if modeler_name in single_parameter.all_modelers: + modeler = single_parameter.all_modelers[modeler_name] + sub_parser = _create_parser(modeler, modeler_name) + print('Single Parameter Options') + print('------------------------') + sub_parser.print_help() + if modeler_name in multi_parameter.all_modelers: + modeler = multi_parameter.all_modelers[modeler_name] + sub_parser = _create_parser( + modeler, modeler_name, "", False) + _add_single_parameter_options(sub_parser) + print() + print('Multi Parameter Options') + print('-----------------------') + sub_parser.print_help() + else: + raise ExtrapModelerException( + "The given modeler does not exist. Valid options are: "+str(modelers_list)) + except ExtrapModelerException as e: + print("Warning: "+e.message) + def to_html( self, show_mean: bool = False, @@ -750,6 +794,7 @@ def _add_extrap_statistics(self, tht: Thicket, node: node, metric: str) -> None: def create_models(self, tht: Thicket, + model_name: str, parameters: list[str] = None, metrics: list[str] = None, use_median: bool = True, @@ -757,7 +802,6 @@ def create_models(self, scaling_parameter: str = "jobsize", add_stats: bool = True, modeler: str = "default", - model_name: str = "default_model", allow_log_terms: bool = True ) -> None: """Converts the data in the given thicket into a format that @@ -1003,14 +1047,15 @@ def create_models(self, # check the created experiment for its validty io_helper.validate_experiment(experiment) - # TODO: these need to be set by the arguments in the function call + # check if the given modeler exists modelers_list = list(set(k.lower() for k in chain(single_parameter.all_modelers.keys(), multi_parameter.all_modelers.keys()))) - print("DEBUG: modelers_list", modelers_list) if modeler.lower() not in modelers_list: - # TODO: throw an exception (maybe custom one) saying the given modeler does not exist - # then continue with the default modeler + raise ExtrapModelerException("The given modeler does not exist in Extra-P. Valid options are: "+str( + modelers_list)+". Using default modeler instead.") modeler = "default" + + # DEBUG modeler_options = {'allow_log_terms': allow_log_terms, 'use_crossvalidation': None, 'compare_with_RSS': None, 'poly_exponents': None, 'log_exponents': None, 'retain_default_exponents': None, 'force_combination_exponents': None, 'allow_negative_exponents': None} # TODO: I basically want to get this dict from extra-p based on the modeler that is currently selected @@ -1019,13 +1064,16 @@ def create_models(self, # generate models using Extra-P model generator model_generator = ModelGenerator( experiment, - modeler=modeler.lower(), + modeler=modeler, name=model_name, use_median=use_median ) # apply modeler options modeler = model_generator.modeler + + print("DEBUG:", modeler.OPTIONS) + if isinstance(modeler, MultiParameterModeler) and modeler_options: # set single-parameter modeler of multi-parameter modeler single_modeler = modeler_options[SINGLE_PARAMETER_MODELER_KEY] @@ -1039,6 +1087,7 @@ def create_models(self, setattr(modeler.single_parameter_modeler, name, value) for name, value in modeler_options.items(): + print(name, value) if value is not None: setattr(modeler, name, value) @@ -1098,26 +1147,6 @@ def create_models(self, tht.statsframe.dataframe = tht.statsframe.dataframe.join(pd.DataFrame( data, columns=pd.MultiIndex.from_product([[model_name], column_names]), index=tht.statsframe.dataframe.index)) - """# add the models, and statistics into the dataframe - for callpath in experiment.callpaths: - for metric in experiment.metrics: - mkey = (callpath, metric) - for thicket_node, _ in tht.dataframe.groupby(level=0): - if Callpath(thicket_node.frame["name"]) == callpath: - # catch key errors when queriying for models with a callpath, metric combination - # that does not exist because there was no measurement object created for them - try: - tht.statsframe.dataframe.at[thicket_node, [model_name, str( - metric) + MODEL_TAG] - ] = ModelWrapper(model_generator.models[mkey], parameters, model_name) - # Add statistics to aggregated statistics table - if add_stats: - self._add_extrap_statistics( - tht, thicket_node, str(metric)) - except Exception as e: - print(e) - pass""" - else: # check if there is already a extra-p model in the dataframe model_exists = True @@ -1152,6 +1181,9 @@ def create_models(self, # if there is already a model in the dataframe, concat them and add a multi column index if model_exists is True: + tht.statsframe.dataframe = tht.statsframe.dataframe.drop( + columns="name") + print("DEBUG tht:", tht.statsframe.dataframe) tht.statsframe.dataframe = pd.concat( [tht2.statsframe.dataframe, tht.statsframe.dataframe], axis=1, keys=[str(modeler_name), str(model_name)]) From 4d2d5a4c4b6904aca02805ab4963666f66a32af9 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Thu, 2 Nov 2023 15:40:36 -0700 Subject: [PATCH 46/57] added full support for extra-p modeler configuration options with error checking --- thicket/model_extrap.py | 121 +++++++++++++++++++++++++++------------- 1 file changed, 81 insertions(+), 40 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index f2e4e17a..131e34e0 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -43,7 +43,8 @@ from extrap.entities.functions import Function from extrap.entities.terms import DEFAULT_PARAM_NAMES from extrap.entities.functions import ConstantFunction -from extrap.util.options_parser import _create_parser, _add_single_parameter_options +from extrap.util.options_parser import _create_parser, _add_single_parameter_options, _modeler_option_bool +from extrap.modelers.modeler_options import ModelerOptionsGroup MODEL_TAG = "_extrap-model" @@ -653,44 +654,66 @@ def __init__(self) -> None: """ Create a new Extra-P Interface object. """ - pass + self.modelers_list = list(set(k.lower() for k in chain( + single_parameter.all_modelers.keys(), + multi_parameter.all_modelers.keys()))) def print_modelers(self) -> None: """ Prints the available modelers in a list. """ - modelers_list = list(set(k.lower() for k in chain( - single_parameter.all_modelers.keys(), multi_parameter.all_modelers.keys()))) - print("Available Extra-P Modeler:", modelers_list) + print("Available Extra-P Modeler:", self.modelers_list) def print_modeler_options(self, modeler_name: str) -> None: """ Prints all the modeler options available for the given modeler. """ - modelers_list = list(set(k.lower() for k in chain( - single_parameter.all_modelers.keys(), multi_parameter.all_modelers.keys()))) + text = "Modeler Options\n" + text += "--------------\n" + modeler = self._check_modeler_name(modeler_name) + if modeler is not None: + if hasattr(modeler, 'OPTIONS'): + for name, option in modeler.OPTIONS.items(): + if isinstance(option, ModelerOptionsGroup): + for o in option.options: + metavar = o.range or o.type.__name__.upper() + text += str(o.field) + "\t " + str(metavar) + \ + "\t " + \ + str(o.description) + "\n" + else: + metavar = option.range or option.type.__name__.upper() + text += str(option.field) + "\t " + str(metavar) + \ + "\t " + \ + str(option.description) + "\n" + print(text) + + def _check_modeler_name(self, modeler_name): + modeler = None try: - if modeler_name.lower() in modelers_list: + if modeler_name.lower() in self.modelers_list: if modeler_name in single_parameter.all_modelers: modeler = single_parameter.all_modelers[modeler_name] - sub_parser = _create_parser(modeler, modeler_name) - print('Single Parameter Options') - print('------------------------') - sub_parser.print_help() - if modeler_name in multi_parameter.all_modelers: + elif modeler_name in multi_parameter.all_modelers: modeler = multi_parameter.all_modelers[modeler_name] - sub_parser = _create_parser( - modeler, modeler_name, "", False) - _add_single_parameter_options(sub_parser) - print() - print('Multi Parameter Options') - print('-----------------------') - sub_parser.print_help() - else: - raise ExtrapModelerException( - "The given modeler does not exist. Valid options are: "+str(modelers_list)) + else: + raise ExtrapModelerException( + "The given modeler does not exist. Valid options are: "+str(self.modelers_list)) except ExtrapModelerException as e: - print("Warning: "+e.message) + print("WARNING: "+e.message) + return modeler + + def _check_modeler_options(self, modeler_name): + modeler = self._check_modeler_name(modeler_name) + options = {} + if modeler is not None: + if hasattr(modeler, 'OPTIONS'): + for name, option in modeler.OPTIONS.items(): + if isinstance(option, ModelerOptionsGroup): + for o in option.options: + options[str(o.field)] = None + else: + options[str(option.field)] = None + return options, modeler_name def to_html( self, @@ -802,7 +825,7 @@ def create_models(self, scaling_parameter: str = "jobsize", add_stats: bool = True, modeler: str = "default", - allow_log_terms: bool = True + modeler_options: dict = None ) -> None: """Converts the data in the given thicket into a format that can be read by Extra-P. Then the Extra-P modeler is called @@ -814,6 +837,8 @@ def create_models(self, Arguments: tht (Thicket): The thicket object to get the data from for modeling. + model_name (str): Specify the name of the modeler internally used + by Extra-P. parameters (list): A list of String values of the parameters that will be considered for modeling by Extra-P. (Default=None) metrics (list): A list of String value of the metrics @@ -835,6 +860,10 @@ def create_models(self, allocation is fix. (Default="jobsize") add_stats (bool): Option to add hypothesis function statistics to the aggregated statistics table. (Default=True) + modeler (str): Set the name of the modeler that should be used + for modeling by Extra-P. (Default="default") + modeler_options (dict): A dict containing the options that will + be set and used for modeling by the given modeler. (Default=None) """ # create a copy of the thicket to concat them later on @@ -854,7 +883,7 @@ def create_models(self, else: metrics = metrics - self.experiment = None + # self.experiment = None # create an extra-p experiment experiment = Experiment() @@ -1048,20 +1077,16 @@ def create_models(self, io_helper.validate_experiment(experiment) # check if the given modeler exists - modelers_list = list(set(k.lower() for k in - chain(single_parameter.all_modelers.keys(), multi_parameter.all_modelers.keys()))) - if modeler.lower() not in modelers_list: + if modeler.lower() not in self.modelers_list: raise ExtrapModelerException("The given modeler does not exist in Extra-P. Valid options are: "+str( - modelers_list)+". Using default modeler instead.") + self.modelers_list)+". Using default modeler instead.") modeler = "default" - # DEBUG - modeler_options = {'allow_log_terms': allow_log_terms, 'use_crossvalidation': None, 'compare_with_RSS': None, 'poly_exponents': None, - 'log_exponents': None, 'retain_default_exponents': None, 'force_combination_exponents': None, 'allow_negative_exponents': None} - # TODO: I basically want to get this dict from extra-p based on the modeler that is currently selected - # then generate options class automatically from that??? + # special dict to check if all given options for the modeler do exist for the given modeler + modeler_options_check, base_modeler_name = self._check_modeler_options( + modeler) - # generate models using Extra-P model generator + # create a model generator object for the experiment model_generator = ModelGenerator( experiment, modeler=modeler, @@ -1071,28 +1096,44 @@ def create_models(self, # apply modeler options modeler = model_generator.modeler + if isinstance(modeler, MultiParameterModeler) and modeler_options: - print("DEBUG:", modeler.OPTIONS) + # if there are no single parameter options, modeler defined in the options go with the default values + if "#single_parameter_modeler" not in modeler_options: + modeler_options["#single_parameter_modeler"] = "default" + if "#single_parameter_options" not in modeler_options: + modeler_options["#single_parameter_options"] = {} - if isinstance(modeler, MultiParameterModeler) and modeler_options: # set single-parameter modeler of multi-parameter modeler single_modeler = modeler_options[SINGLE_PARAMETER_MODELER_KEY] if single_modeler is not None: modeler.single_parameter_modeler = single_parameter.all_modelers[single_modeler]( ) + + # special dict to check if all given options for the modeler do exist for the given modeler + single_modeler_options_check, single_modeler_name = self._check_modeler_options( + single_modeler) + # apply options of single-parameter modeler if modeler.single_parameter_modeler is not None: for name, value in modeler_options[SINGLE_PARAMETER_OPTIONS_KEY].items(): + if name not in single_modeler_options_check: + print("WARNING: The option "+str(name) + + " does not exist for the modeler: "+str(single_modeler_name)+". Extra-P will ignore this parameter.") if value is not None: setattr(modeler.single_parameter_modeler, name, value) for name, value in modeler_options.items(): - print(name, value) + if name not in modeler_options_check and name != "#single_parameter_modeler" and name != "#single_parameter_options": + print("WARNING: The option "+str(name) + + " does not exist for the modeler: "+str(base_modeler_name)+". Extra-P will ignore this parameter.") if value is not None: setattr(modeler, name, value) + # create the models model_generator.model_all() + # add the modeler generator to the experiment experiment.add_modeler(model_generator) # check if dataframe has already a multi column index @@ -1187,7 +1228,7 @@ def create_models(self, tht.statsframe.dataframe = pd.concat( [tht2.statsframe.dataframe, tht.statsframe.dataframe], axis=1, keys=[str(modeler_name), str(model_name)]) - self.experiment = experiment + # self.experiment = experiment def _componentize_function( model_object: Model, parameters: list[str] From bafe065f5344211ecab8ab269455bec442bb4623 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Thu, 2 Nov 2023 18:12:46 -0700 Subject: [PATCH 47/57] multi column indexing working --- thicket/model_extrap.py | 77 +++++++++++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 14 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 131e34e0..52e5d9ec 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -1096,7 +1096,7 @@ def create_models(self, # apply modeler options modeler = model_generator.modeler - if isinstance(modeler, MultiParameterModeler) and modeler_options: + if isinstance(modeler, MultiParameterModeler) and modeler_options is not None: # if there are no single parameter options, modeler defined in the options go with the default values if "#single_parameter_modeler" not in modeler_options: @@ -1123,12 +1123,13 @@ def create_models(self, if value is not None: setattr(modeler.single_parameter_modeler, name, value) - for name, value in modeler_options.items(): - if name not in modeler_options_check and name != "#single_parameter_modeler" and name != "#single_parameter_options": - print("WARNING: The option "+str(name) + - " does not exist for the modeler: "+str(base_modeler_name)+". Extra-P will ignore this parameter.") - if value is not None: - setattr(modeler, name, value) + if modeler_options is not None: + for name, value in modeler_options.items(): + if name not in modeler_options_check and name != "#single_parameter_modeler" and name != "#single_parameter_options": + print("WARNING: The option "+str(name) + + " does not exist for the modeler: "+str(base_modeler_name)+". Extra-P will ignore this parameter.") + if value is not None: + setattr(modeler, name, value) # create the models model_generator.model_all() @@ -1139,11 +1140,6 @@ def create_models(self, # check if dataframe has already a multi column index if tht.statsframe.dataframe.columns.nlevels > 1: - # TODO: not sure if I actually need this code... - modeler_names = [] - for x in tht.statsframe.dataframe.columns.get_level_values(0).unique(): - modeler_names.append(x) - # create a list with the column names column_names = [] column_names.append(str(metric) + MODEL_TAG) @@ -1224,11 +1220,64 @@ def create_models(self, if model_exists is True: tht.statsframe.dataframe = tht.statsframe.dataframe.drop( columns="name") - print("DEBUG tht:", tht.statsframe.dataframe) + # print("DEBUG tht:", tht.statsframe.dataframe) + tht3 = copy.deepcopy(tht2) + for column in tht3.statsframe.dataframe.columns: + if "_extrap-model" in column: + tht3.statsframe.dataframe = tht3.statsframe.dataframe.drop( + columns=column) + tht2.statsframe.dataframe = tht2.statsframe.dataframe.drop( + columns="name") + tht.statsframe.dataframe = pd.concat( [tht2.statsframe.dataframe, tht.statsframe.dataframe], axis=1, keys=[str(modeler_name), str(model_name)]) + tht.statsframe.dataframe = self._concat( + [tht3.statsframe.dataframe, tht.statsframe.dataframe], axis=1) - # self.experiment = experiment + def _concat(self, dfs, axis=0, *args, **kwargs): + """ + Wrapper for `pandas.concat'; concatenate pandas objects even if they have + unequal number of levels on concatenation axis. + + Levels containing empty strings are added from below (when concatenating along + columns) or right (when concateniting along rows) to match the maximum number + found in the dataframes. + + Parameters + ---------- + dfs : Iterable + Dataframes that must be concatenated. + axis : int, optional + Axis along which concatenation must take place. The default is 0. + + Returns + ------- + pd.DataFrame + Concatenated Dataframe. + + Notes + ----- + Any arguments and kwarguments are passed onto the `pandas.concat` function. + + See also + -------- + pandas.concat + """ + def index(df): + return df.columns if axis == 1 else df.index + + def add_levels(df): + need = want - index(df).nlevels + if need > 0: + # prepend empty levels + df = pd.concat([df], keys=[('',)*need], axis=axis) + for i in range(want-need): # move empty levels to bottom + df = df.swaplevel(i, i+need, axis=axis) + return df + + want = np.max([index(df).nlevels for df in dfs]) + dfs = [add_levels(df) for df in dfs] + return pd.concat(dfs, axis=axis, *args, **kwargs) def _componentize_function( model_object: Model, parameters: list[str] From 96deacf9d6c78e9f7ca336fa33019da28c7eeb06 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Fri, 3 Nov 2023 00:52:28 -0700 Subject: [PATCH 48/57] updated visualization methods --- thicket/model_extrap.py | 117 +++++++++++++++++++++++----------------- 1 file changed, 68 insertions(+), 49 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 52e5d9ec..52cb6892 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -143,13 +143,14 @@ def eval(self, val: float) -> float: """ return self.mdl.hypothesis.function.evaluate(val) - def display_one_parameter_model( + def _display_one_parameter_model( self, show_mean: bool = False, show_median: bool = False, show_min_max: bool = False, RSS: bool = False, AR2: bool = False, + SMAPE: bool = False, show_opt_scaling: bool = False, opt_scaling_func: str = None, ) -> Tuple[Figure, Axes]: @@ -252,7 +253,7 @@ def display_one_parameter_model( ) # otherwise try to figure out the optimal scaling curve automatically else: - if parameters[0] == "jobsize": + if self.parameters[0] == "jobsize": y_vals_opt = [] for _ in range(len(y_vals)): y_vals_opt.append(y_vals[0]) @@ -264,32 +265,37 @@ def display_one_parameter_model( ) # plot axes and titles - ax.set_xlabel(parameters[0] + " $" + + ax.set_xlabel(self.parameters[0] + " $" + str(DEFAULT_PARAM_NAMES[0])+"$") ax.set_ylabel(self.mdl.metric) ax.set_title(str(self.mdl.callpath) + "()") - # plot rss and ar2 values - y_pos_text = max(maxes) - 0.1 * max(maxes) + # plot rss, ar2, and smape values + y_pos_text = ax.get_ylim()[1] - 0.2 * ax.get_ylim()[1] rss = "{:.3f}".format(self.mdl.hypothesis.RSS) ar2 = "{:.3f}".format(self.mdl.hypothesis.AR2) - if RSS and not AR2: - ax.text( - x_vals[0], - y_pos_text, - "RSS = " + rss, - ) - elif AR2 and not RSS: - ax.text( - x_vals[0], - y_pos_text, - "AR\u00b2 = " + ar2, - ) - elif RSS and AR2: + smape = "{:.3f}".format(self.mdl.hypothesis.SMAPE) + + stats_text = "" + + if RSS: + stats_text += "RSS = " + rss + if AR2: + if stats_text != "": + stats_text += "\nAR\u00b2 = " + ar2 + else: + stats_text += "AR\u00b2 = " + ar2 + if SMAPE: + if stats_text != "": + stats_text += "\nSMAPE = " + smape + else: + stats_text += "SMAPE = " + smape + + if RSS or AR2 or SMAPE: ax.text( x_vals[0], y_pos_text, - "RSS = " + rss + "\nAR\u00b2 = " + ar2, + stats_text ) # plot legend @@ -297,7 +303,7 @@ def display_one_parameter_model( return fig, ax - def draw_legend( + def _draw_legend( self, axis: Axes, dict_callpath_color: dict[str, list[str]], function_char_len: int ) -> None: """This method draws a legend for 3D plots. @@ -351,13 +357,14 @@ def draw_legend( axis.legend(handles=handles, loc="center right", bbox_to_anchor=(1.75+(function_char_len)*0.01, 0.5)) - def display_two_parameter_model( + def _display_two_parameter_model( self, show_mean: bool = False, show_median: bool = False, show_min_max: bool = False, RSS: bool = False, AR2: bool = False, + SMAPE: bool = False, show_opt_scaling: bool = False, opt_scaling_func: str = None, ) -> Tuple[Figure, Axes]: @@ -369,6 +376,7 @@ def display_two_parameter_model( show_min_max (bool, optional): whether to display min/max values on the plot. Defaults to False. RSS (bool, optional): whether to display Extra-P model RSS on the plot. Defaults to False. AR2 (bool, optional): whether to display Extra-P model AR2 on the plot. Defaults to False. + SMAPE (bool, optional): whether to display Extra-P model SMAPE on the plot. Defaults to False. show_opt_scaling (bool, optional): whether to display the optimal scaling curve. Defaults to False. opt_scaling_func (str, optional): an optimal scaling function as a python interpretable string provided by the user. Defaults to None. @@ -458,8 +466,8 @@ def opt_scaling_func_user(p, q): # otherwise try to figure out the optimal scaling curve automatically else: if ( - parameters[0] == "jobsize" - and parameters[1] == "problem_size" + self.parameters[0] == "jobsize" + and self.parameters[1] == "problem_size" ): z_vals_opt = opt_scaling_func_auto(x_vals, y_vals) ax.plot_surface( @@ -527,9 +535,9 @@ def opt_scaling_func_user(p, q): ax.plot(line_x, line_y, line_z, color="black") # axis labels and title - ax.set_xlabel(parameters[0] + " $" + + ax.set_xlabel(self.parameters[0] + " $" + str(DEFAULT_PARAM_NAMES[0])+"$") - ax.set_ylabel(parameters[1] + " $" + + ax.set_ylabel(self.parameters[1] + " $" + str(DEFAULT_PARAM_NAMES[1])+"$") ax.set_zlabel(self.mdl.metric) ax.set_title(str(self.mdl.callpath) + "()") @@ -551,34 +559,37 @@ def opt_scaling_func_user(p, q): if show_opt_scaling: dict_callpath_color["optimal scaling"] = ["surface", "red"] - # plot rss and ar2 values + # plot rss, ar2, and smape values rss = "{:.3f}".format(self.mdl.hypothesis.RSS) ar2 = "{:.3f}".format(self.mdl.hypothesis.AR2) - if RSS and not AR2: - ax.text2D( - 0, - 0.75, - "RSS = " + rss, - transform=ax.transAxes, - ) - elif AR2 and not RSS: - ax.text2D( - 0, - 0.75, - "AR\u00b2 = " + ar2, - transform=ax.transAxes, - ) - elif RSS and AR2: + smape = "{:.3f}".format(self.mdl.hypothesis.SMAPE) + + stats_text = "" + + if RSS: + stats_text += "RSS = " + rss + if AR2: + if stats_text != "": + stats_text += "\nAR\u00b2 = " + ar2 + else: + stats_text += "AR\u00b2 = " + ar2 + if SMAPE: + if stats_text != "": + stats_text += "\nSMAPE = " + smape + else: + stats_text += "SMAPE = " + smape + + if RSS or AR2 or SMAPE: ax.text2D( 0, 0.75, - "RSS = " + rss + "\nAR\u00b2 = " + ar2, + stats_text, transform=ax.transAxes, ) # draw the legend - self.draw_legend(ax, dict_callpath_color, - len(scientific_function)) + self._draw_legend(ax, dict_callpath_color, + len(scientific_function)) return fig, ax @@ -589,6 +600,7 @@ def display( show_min_max: bool = False, RSS: bool = False, AR2: bool = False, + SMAPE: bool = False, show_opt_scaling: bool = False, opt_scaling_func: str = None, ) -> Tuple[Figure, Axes]: @@ -602,6 +614,7 @@ def display( show_min_max (bool, optional): whether to display min/max values on the plot. Defaults to False. RSS (bool, optional): whether to display Extra-P model RSS on the plot. Defaults to False. AR2 (bool, optional): whether to display Extra-P model AR2 on the plot. Defaults to False. + SMAPE (bool, optional): whether to display Extra-P model SMAPE on the plot. Defaults to False. show_opt_scaling (bool, optional): whether to display the optimal scaling curve. Defaults to False. opt_scaling_func (str, optional): an optimal scaling function as a python interpretable string provided by the user. Defaults to None. @@ -613,24 +626,26 @@ def display( """ # check number of model parameters - if len(parameters) == 1: - fig, ax = self.display_one_parameter_model( + if len(self.parameters) == 1: + fig, ax = self._display_one_parameter_model( show_mean, show_median, show_min_max, RSS, AR2, + SMAPE, show_opt_scaling, opt_scaling_func, ) - elif len(parameters) == 2: - fig, ax = self.display_two_parameter_model( + elif len(self.parameters) == 2: + fig, ax = self._display_two_parameter_model( show_mean, show_median, show_min_max, RSS, AR2, + SMAPE, show_opt_scaling, opt_scaling_func, ) @@ -638,7 +653,7 @@ def display( else: raise Exception( "Plotting performance models with " - + str(len(parameters)) + + str(len(self.parameters)) + " parameters is currently not supported." ) @@ -767,6 +782,10 @@ def model_to_img_html(model_obj: Model) -> str: plt.close(fig) return imgstr + # TODO: figure out how to replace this code + # TODO: by adding something in model wrapper object + # that can be accessed here... + # catch key errors when queriying for models with a callpath, metric combination # that does not exist because there was no measurement object created for them existing_metrics = [] From 30bdd47b7454aac25e1fdd76e8a30e46f11bf0e0 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Mon, 6 Nov 2023 16:39:44 -0800 Subject: [PATCH 49/57] fixed bugs in multi column indexing --- thicket/model_extrap.py | 185 +++++++++++++++++----------------------- 1 file changed, 77 insertions(+), 108 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 52cb6892..9a70e282 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -672,6 +672,8 @@ def __init__(self) -> None: self.modelers_list = list(set(k.lower() for k in chain( single_parameter.all_modelers.keys(), multi_parameter.all_modelers.keys()))) + self.configs = [] + self.experiments = {} def print_modelers(self) -> None: """ @@ -786,13 +788,16 @@ def model_to_img_html(model_obj: Model) -> str: # TODO: by adding something in model wrapper object # that can be accessed here... + config = self.configs[0] + # catch key errors when queriying for models with a callpath, metric combination # that does not exist because there was no measurement object created for them existing_metrics = [] - for callpath in self.experiment.callpaths: - for metric in self.experiment.metrics: + experiment = self.experiments[config] + for callpath in experiment.callpaths: + for metric in experiment.metrics: try: - self.experiment.modelers[0].models[(callpath, metric)] + experiment.modelers[0].models[(callpath, metric)] if str(metric) not in existing_metrics: existing_metrics.append(str(metric)) except KeyError: @@ -888,6 +893,17 @@ def create_models(self, # create a copy of the thicket to concat them later on tht2 = copy.deepcopy(tht) + # add this configuration to the list of the interface + try: + if model_name in self.configs: + raise Exception("A configuration with the name '" + + str(model_name)+"' already exists. Choose another name!") + else: + self.configs.append(model_name) + except Exception as e: + print("ERROR:", e) + return + # set the model parameters if not parameters: # if there are no parameters provided use the jobsize @@ -902,8 +918,6 @@ def create_models(self, else: metrics = metrics - # self.experiment = None - # create an extra-p experiment experiment = Experiment() @@ -1161,61 +1175,71 @@ def create_models(self, # create a list with the column names column_names = [] - column_names.append(str(metric) + MODEL_TAG) - if add_stats: - column_names.append(str(metric) + "_RSS" + MODEL_TAG) - column_names.append(str(metric) + "_rRSS" + MODEL_TAG) - column_names.append(str(metric) + "_SMAPE" + MODEL_TAG) - column_names.append(str(metric) + "_AR2" + MODEL_TAG) - column_names.append(str(metric) + "_RE" + MODEL_TAG) - - # create a numpy array containing the data - for callpath in experiment.callpaths: + column_names.append("name") + for metric in experiment.metrics: + column_names.append(str(metric) + MODEL_TAG) + if add_stats: + column_names.append(str(metric) + "_RSS" + MODEL_TAG) + column_names.append(str(metric) + "_rRSS" + MODEL_TAG) + column_names.append(str(metric) + "_SMAPE" + MODEL_TAG) + column_names.append(str(metric) + "_AR2" + MODEL_TAG) + column_names.append(str(metric) + "_RE" + MODEL_TAG) + + # create the table with the data that will be joined together with the column name list with the existing thicket + table = [] + for thicket_node, _ in tht.dataframe.groupby(level=0): + row = [] + row.append(str(thicket_node.frame["name"])) for metric in experiment.metrics: - mkey = (callpath, metric) - rows = [] - for thicket_node, _ in tht.dataframe.groupby(level=0): - if Callpath(thicket_node.frame["name"]) == callpath: - # catch key errors when queriying for models with a callpath, metric combination - # that does not exist because there was no measurement object created for them - try: - values = [] - model_wrapper = ModelWrapper( - model_generator.models[mkey], parameters, model_name) - values.append(model_wrapper) - if add_stats: - values.append( - model_wrapper.mdl.hypothesis.RSS) - values.append( - model_wrapper.mdl.hypothesis.rRSS) - values.append( - model_wrapper.mdl.hypothesis.SMAPE) - values.append( - model_wrapper.mdl.hypothesis.AR2) - values.append( - model_wrapper.mdl.hypothesis.RE) - except Exception: - pass - rows.append(values) - - data = np.array(rows) - + mkey = (Callpath(thicket_node.frame["name"]), metric) + try: + model_wrapper = ModelWrapper( + model_generator.models[mkey], parameters, model_name) + row.append(model_wrapper) + if add_stats: + row.append( + model_wrapper.mdl.hypothesis.RSS) + row.append( + model_wrapper.mdl.hypothesis.rRSS) + row.append( + model_wrapper.mdl.hypothesis.SMAPE) + row.append( + model_wrapper.mdl.hypothesis.AR2) + row.append( + model_wrapper.mdl.hypothesis.RE) + except KeyError: + row.append(math.nan) + if add_stats: + row.append(math.nan) + row.append(math.nan) + row.append(math.nan) + row.append(math.nan) + row.append(math.nan) + table.append(row) + data = np.array(table) + + # join with existing thicket tht.statsframe.dataframe = tht.statsframe.dataframe.join(pd.DataFrame( data, columns=pd.MultiIndex.from_product([[model_name], column_names]), index=tht.statsframe.dataframe.index)) else: # check if there is already a extra-p model in the dataframe - model_exists = True + model_exists = False modeler_name = None - try: - modeler_name = tht.statsframe.dataframe.at[thicket_node, - str(metric) + MODEL_TAG].name - model_exists = True - except KeyError: - model_exists = False + for metric in experiment.metrics: + try: + modeler_name = tht.statsframe.dataframe.at[thicket_node, + str(metric) + MODEL_TAG].name + model_exists = True + except KeyError: + pass # add the models, and statistics into the dataframe - # TODO: see how I can get the name column out of the multi index, that column is only needed once!!! + remove_columns = list(tht.statsframe.dataframe.columns) + remove_columns.remove("name") + for i in range(len(remove_columns)): + tht.statsframe.dataframe = tht.statsframe.dataframe.drop( + columns=remove_columns[i]) for callpath in experiment.callpaths: for metric in experiment.metrics: mkey = (callpath, metric) @@ -1237,66 +1261,11 @@ def create_models(self, # if there is already a model in the dataframe, concat them and add a multi column index if model_exists is True: - tht.statsframe.dataframe = tht.statsframe.dataframe.drop( - columns="name") - # print("DEBUG tht:", tht.statsframe.dataframe) - tht3 = copy.deepcopy(tht2) - for column in tht3.statsframe.dataframe.columns: - if "_extrap-model" in column: - tht3.statsframe.dataframe = tht3.statsframe.dataframe.drop( - columns=column) - tht2.statsframe.dataframe = tht2.statsframe.dataframe.drop( - columns="name") - + tht.statsframe.dataframe = pd.concat( [tht2.statsframe.dataframe, tht.statsframe.dataframe], axis=1, keys=[str(modeler_name), str(model_name)]) - tht.statsframe.dataframe = self._concat( - [tht3.statsframe.dataframe, tht.statsframe.dataframe], axis=1) - def _concat(self, dfs, axis=0, *args, **kwargs): - """ - Wrapper for `pandas.concat'; concatenate pandas objects even if they have - unequal number of levels on concatenation axis. - - Levels containing empty strings are added from below (when concatenating along - columns) or right (when concateniting along rows) to match the maximum number - found in the dataframes. - - Parameters - ---------- - dfs : Iterable - Dataframes that must be concatenated. - axis : int, optional - Axis along which concatenation must take place. The default is 0. - - Returns - ------- - pd.DataFrame - Concatenated Dataframe. - - Notes - ----- - Any arguments and kwarguments are passed onto the `pandas.concat` function. - - See also - -------- - pandas.concat - """ - def index(df): - return df.columns if axis == 1 else df.index - - def add_levels(df): - need = want - index(df).nlevels - if need > 0: - # prepend empty levels - df = pd.concat([df], keys=[('',)*need], axis=axis) - for i in range(want-need): # move empty levels to bottom - df = df.swaplevel(i, i+need, axis=axis) - return df - - want = np.max([index(df).nlevels for df in dfs]) - dfs = [add_levels(df) for df in dfs] - return pd.concat(dfs, axis=axis, *args, **kwargs) + self.experiments[model_name] = experiment def _componentize_function( model_object: Model, parameters: list[str] From 4c282904e411192b69c72290c33e26742a1c3064 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Fri, 17 Nov 2023 11:16:34 -0800 Subject: [PATCH 50/57] updated --- thicket/model_extrap.py | 53 ++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 9a70e282..8c44acba 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -734,6 +734,7 @@ def _check_modeler_options(self, modeler_name): def to_html( self, + tht: Thicket, show_mean: bool = False, show_median: bool = False, show_min_max: bool = False, @@ -788,28 +789,32 @@ def model_to_img_html(model_obj: Model) -> str: # TODO: by adding something in model wrapper object # that can be accessed here... - config = self.configs[0] + for config in self.configs: - # catch key errors when queriying for models with a callpath, metric combination - # that does not exist because there was no measurement object created for them - existing_metrics = [] - experiment = self.experiments[config] - for callpath in experiment.callpaths: - for metric in experiment.metrics: - try: - experiment.modelers[0].models[(callpath, metric)] - if str(metric) not in existing_metrics: - existing_metrics.append(str(metric)) - except KeyError: - pass + # catch key errors when queriying for models with a callpath, metric combination + # that does not exist because there was no measurement object created for them + existing_metrics = [] + experiment = self.experiments[config] + for callpath in experiment.callpaths: + for metric in experiment.metrics: + try: + experiment.modelers[0].models[(callpath, metric)] + if str(metric) not in existing_metrics: + existing_metrics.append(str(metric)) + except KeyError: + pass + + # TODO iterate through configs... - frm_dict = { - met + MODEL_TAG: model_to_img_html for met in existing_metrics} + frm_dict = { + met + MODEL_TAG: model_to_img_html for met in existing_metrics} - # Subset of the aggregated statistics table with only the Extra-P columns selected - return thicket.statsframe.dataframe[ - [met + MODEL_TAG for met in existing_metrics] - ].to_html(escape=False, formatters=frm_dict) + tht.statsframe.dataframe[config] = tht.statsframe.dataframe[config][ + [met + MODEL_TAG for met in existing_metrics] + ].to_html(escape=False, formatters=frm_dict) + + # Subset of the aggregated statistics table with only the Extra-P columns selected + return tht.statsframe.dataframe.to_html() def _add_extrap_statistics(self, tht: Thicket, node: node, metric: str) -> None: """Insert the Extra-P hypothesis function statistics into the aggregated @@ -1187,7 +1192,7 @@ def create_models(self, # create the table with the data that will be joined together with the column name list with the existing thicket table = [] - for thicket_node, _ in tht.dataframe.groupby(level=0): + for thicket_node, _ in tht.dataframe.groupby(level=0): row = [] row.append(str(thicket_node.frame["name"])) for metric in experiment.metrics: @@ -1217,7 +1222,7 @@ def create_models(self, row.append(math.nan) table.append(row) data = np.array(table) - + # join with existing thicket tht.statsframe.dataframe = tht.statsframe.dataframe.join(pd.DataFrame( data, columns=pd.MultiIndex.from_product([[model_name], column_names]), index=tht.statsframe.dataframe.index)) @@ -1229,7 +1234,7 @@ def create_models(self, for metric in experiment.metrics: try: modeler_name = tht.statsframe.dataframe.at[thicket_node, - str(metric) + MODEL_TAG].name + str(metric) + MODEL_TAG].name model_exists = True except KeyError: pass @@ -1239,7 +1244,7 @@ def create_models(self, remove_columns.remove("name") for i in range(len(remove_columns)): tht.statsframe.dataframe = tht.statsframe.dataframe.drop( - columns=remove_columns[i]) + columns=remove_columns[i]) for callpath in experiment.callpaths: for metric in experiment.metrics: mkey = (callpath, metric) @@ -1261,7 +1266,7 @@ def create_models(self, # if there is already a model in the dataframe, concat them and add a multi column index if model_exists is True: - + tht.statsframe.dataframe = pd.concat( [tht2.statsframe.dataframe, tht.statsframe.dataframe], axis=1, keys=[str(modeler_name), str(model_name)]) From c7e389d1c3189fa40e4d69f37f7fd0549c8a461d Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Fri, 17 Nov 2023 11:44:50 -0800 Subject: [PATCH 51/57] added check for .cali files --- thicket/thicket.py | 50 +++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index bf2f142d..ce01ae8b 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -151,7 +151,8 @@ def thicketize_graphframe(gf, prf): # length of the hex string before being converted to an integer. hex_length = 8 - hash_arg = int(md5(prf.encode("utf-8")).hexdigest()[:hex_length], 16) + hash_arg = int(md5(prf.encode("utf-8")).hexdigest() + [:hex_length], 16) th.profile = [hash_arg] th.profile_mapping = OrderedDict({hash_arg: prf}) @@ -239,11 +240,12 @@ def reader_dispatch(func, intersection=False, *args, **kwargs): ) # if directory of files elif os.path.isdir(obj): - for file in os.listdir(obj): - f = os.path.join(obj, file) - ens_list.append( - Thicket.thicketize_graphframe(func(f, *extra_args, **kwargs), f) - ) + for root, dirs, files in os.walk(obj): + for file in files: + if file.endswith(".cali"): + f = os.path.join(root, file) + ens_list.append( + Thicket.thicketize_graphframe(func(f), f)) # if single file elif os.path.isfile(obj): return Thicket.thicketize_graphframe(func(*args, **kwargs), args[0]) @@ -442,7 +444,8 @@ def squash(self, update_inc_cols=True): multiindex = False if isinstance(self.statsframe.dataframe.columns, pd.MultiIndex): multiindex = True - stats_df = helpers._new_statsframe_df(new_dataframe, multiindex=multiindex) + stats_df = helpers._new_statsframe_df( + new_dataframe, multiindex=multiindex) sframe = GraphFrame( graph=new_graph, dataframe=stats_df, @@ -690,7 +693,8 @@ def _agg_to_set(obj): return _set # Execute aggregation - th_copy.metadata = th_copy.metadata.groupby(idx_name).agg(_agg_to_set) + th_copy.metadata = th_copy.metadata.groupby( + idx_name).agg(_agg_to_set) # Append copy to list th_copy_list.append(th_copy) @@ -716,7 +720,8 @@ def to_json(self, ensemble=True, metadata=True, stats=True): jsonified_thicket["graph"] = [formatted_graph_dict] if ensemble: - jsonified_thicket["dataframe_indices"] = list(self.dataframe.index.names) + jsonified_thicket["dataframe_indices"] = list( + self.dataframe.index.names) ef = self.dataframe.reset_index() ef["node"] = ef["node"].apply(lambda n: n._hatchet_nid) jsonified_thicket["dataframe"] = ef.replace({np.nan: None}).to_dict( @@ -724,7 +729,8 @@ def to_json(self, ensemble=True, metadata=True, stats=True): ) if metadata: jsonified_thicket["metadata"] = ( - self.metadata.reset_index().replace({np.nan: None}).to_dict("records") + self.metadata.reset_index().replace( + {np.nan: None}).to_dict("records") ) if stats: sf = self.statsframe.dataframe.copy(deep=True) @@ -788,7 +794,8 @@ def filter_metadata(self, select_function): new_thicket = self.copy() # filter metadata table - filtered_rows = new_thicket.metadata.apply(select_function, axis=1) + filtered_rows = new_thicket.metadata.apply( + select_function, axis=1) new_thicket.metadata = new_thicket.metadata[filtered_rows] # note index keys to filter performance data table @@ -810,7 +817,8 @@ def filter_metadata(self, select_function): ) else: - raise InvalidFilter("The argument passed to filter must be a callable.") + raise InvalidFilter( + "The argument passed to filter must be a callable.") return new_thicket @@ -853,7 +861,8 @@ def query(self, query_obj, squash=True, update_inc_cols=True): query_matches = query.apply(self) filtered_df = dframe_copy.loc[dframe_copy["node"].isin(query_matches)] if filtered_df.shape[0] == 0: - raise EmptyQuery("The provided query would have produced an empty Thicket.") + raise EmptyQuery( + "The provided query would have produced an empty Thicket.") filtered_df.set_index(index_names, inplace=True) filtered_th = self.deepcopy() @@ -939,7 +948,8 @@ def filter_stats(self, filter_function): new_thicket = self.copy() # filter aggregated statistics table based on greater than restriction - filtered_rows = new_thicket.statsframe.dataframe.apply(filter_function, axis=1) + filtered_rows = new_thicket.statsframe.dataframe.apply( + filter_function, axis=1) new_thicket.statsframe.dataframe = new_thicket.statsframe.dataframe[ filtered_rows ] @@ -947,7 +957,8 @@ def filter_stats(self, filter_function): # filter performance data table based on filtered nodes filtered_nodes = new_thicket.statsframe.dataframe.index.values.tolist() new_thicket.dataframe = new_thicket.dataframe[ - new_thicket.dataframe.index.get_level_values("node").isin(filtered_nodes) + new_thicket.dataframe.index.get_level_values( + "node").isin(filtered_nodes) ] # filter nodes in the graphframe based on the dataframe nodes @@ -977,7 +988,8 @@ def get_unique_metadata(self): unique_entries = self.metadata[col].unique().tolist() unique_meta[col] = unique_entries - sorted_meta = dict(sorted(unique_meta.items(), key=lambda x: x[0].lower())) + sorted_meta = dict( + sorted(unique_meta.items(), key=lambda x: x[0].lower())) # columnar joined thicket object else: sorted_meta = [] @@ -986,11 +998,13 @@ def get_unique_metadata(self): if isinstance(self.metadata[idx][col].iloc[0], list): continue else: - unique_entries = self.metadata[idx][col].unique().tolist() + unique_entries = self.metadata[idx][col].unique( + ).tolist() unique_meta[col] = unique_entries sorted_meta.append( - (idx, dict(sorted(unique_meta.items(), key=lambda x: x[0].lower()))) + (idx, dict(sorted(unique_meta.items(), + key=lambda x: x[0].lower()))) ) return sorted_meta From 72727f84a08647f4334bf4cc4ac4e7639a05c5be Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Mon, 27 Nov 2023 15:47:24 -0800 Subject: [PATCH 52/57] updated --- thicket/model_extrap.py | 304 ++++++++++++++++++++++------------------ 1 file changed, 167 insertions(+), 137 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 8c44acba..ecc44638 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -304,14 +304,13 @@ def _display_one_parameter_model( return fig, ax def _draw_legend( - self, axis: Axes, dict_callpath_color: dict[str, list[str]], function_char_len: int + self, axis: Axes, dict_callpath_color: dict[str, list[str]], ) -> None: """This method draws a legend for 3D plots. Args: axis (_type_): The matplotlib axis of a figure object. dict_callpath_color (dict): The color/marker dict for the elements displayed in the plot. - function_char_len (int): The number of chars of the model string. """ handles = list() @@ -354,8 +353,8 @@ def _draw_legend( ) handles.append(mark) - axis.legend(handles=handles, loc="center right", - bbox_to_anchor=(1.75+(function_char_len)*0.01, 0.5)) + axis.legend(handles=handles, loc="lower center", + bbox_to_anchor=(1.75, 0.5)) def _display_two_parameter_model( self, @@ -588,8 +587,9 @@ def opt_scaling_func_user(p, q): ) # draw the legend - self._draw_legend(ax, dict_callpath_color, - len(scientific_function)) + self._draw_legend(ax, dict_callpath_color) + + # plt.tight_layout() return fig, ax @@ -740,6 +740,7 @@ def to_html( show_min_max: bool = False, RSS: bool = False, AR2: bool = False, + SMAPE: bool = False, show_opt_scaling: bool = False, opt_scaling_func: str = None, ) -> DataFrame: @@ -751,6 +752,7 @@ def to_html( show_min_max (bool, optional): whether to display min/max values on the plot. Defaults to False. RSS (bool, optional): whether to display Extra-P model RSS on the plot. Defaults to False. AR2 (bool, optional): whether to display Extra-P model AR2 on the plot. Defaults to False. + SMAPE (bool, optional): whether to display Extra-P model SMAPE on the plot. Defaults to False. show_opt_scaling (bool, optional): whether to display the optimal scaling curve. Defaults to False. opt_scaling_func (str, optional): an optimal scaling function as a python interpretable string provided by the user. Defaults to None. @@ -773,6 +775,7 @@ def model_to_img_html(model_obj: Model) -> str: show_min_max, RSS, AR2, + SMAPE, show_opt_scaling, opt_scaling_func, ) @@ -785,16 +788,43 @@ def model_to_img_html(model_obj: Model) -> str: plt.close(fig) return imgstr - # TODO: figure out how to replace this code - # TODO: by adding something in model wrapper object - # that can be accessed here... + # if the dataframe has a multi-column index + # TODO: FIX THIS CODE!!! + if tht.statsframe.dataframe.columns.nlevels > 1: + for config in self.configs: + + # catch key errors when queriying for models with a callpath, metric combination + # that does not exist because there was no measurement object created for them + existing_metrics = [] + experiment = self.experiments[config] + for callpath in experiment.callpaths: + for metric in experiment.metrics: + try: + experiment.modelers[0].models[(callpath, metric)] + if str(metric) not in existing_metrics: + existing_metrics.append(str(metric)) + except KeyError: + pass - for config in self.configs: + # TODO iterate through configs... + frm_dict = { + met + MODEL_TAG: model_to_img_html for met in existing_metrics} + + tht.statsframe.dataframe[config] = tht.statsframe.dataframe[config][ + [met + MODEL_TAG for met in existing_metrics] + ].to_html(escape=False, formatters=frm_dict) + + # Subset of the aggregated statistics table with only the Extra-P columns selected + + return tht.statsframe.dataframe.to_html() + + # if the dataframe does not have a multi-column index + else: # catch key errors when queriying for models with a callpath, metric combination # that does not exist because there was no measurement object created for them existing_metrics = [] - experiment = self.experiments[config] + experiment = self.experiments[self.configs[0]] for callpath in experiment.callpaths: for metric in experiment.metrics: try: @@ -804,18 +834,14 @@ def model_to_img_html(model_obj: Model) -> str: except KeyError: pass - # TODO iterate through configs... - frm_dict = { met + MODEL_TAG: model_to_img_html for met in existing_metrics} - tht.statsframe.dataframe[config] = tht.statsframe.dataframe[config][ + # Subset of the aggregated statistics table with only the Extra-P columns selected + return tht.statsframe.dataframe[ [met + MODEL_TAG for met in existing_metrics] ].to_html(escape=False, formatters=frm_dict) - # Subset of the aggregated statistics table with only the Extra-P columns selected - return tht.statsframe.dataframe.to_html() - def _add_extrap_statistics(self, tht: Thicket, node: node, metric: str) -> None: """Insert the Extra-P hypothesis function statistics into the aggregated statistics table. Has to be called after "create_models". @@ -1475,7 +1501,7 @@ def _analyze_complexity( from typing import List def complexity_statsframe( - self, columns: list[str] = None, eval_targets: list[list[float]] = None + self, thicket: Thicket, columns: list[str] = None, eval_targets: list[list[float]] = None ) -> None: """Analyzes the complexity of the Extra-P models for the given thicket statsframe and the list of selected columns (metrics) for a given target evaluation scale. Then adds the results back into the statsframe. @@ -1488,133 +1514,137 @@ def complexity_statsframe( ValueError: Raises a ValueError is not in the aggregates statistics table. TypeError: Raises a TypeError if the column is not of the right type. """ - targets = [] - if eval_targets is None: - raise Exception( - "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." - ) - elif len(eval_targets) > 0: - # for each evaluation target check if the number of values matches the number of parameters - for target in eval_targets: - if len(target) != len(parameters): - print( - "The number of given parameter values for the evaluation target need to be the same as the number of model parameters." - ) - else: - targets.append(target) - - # if there are targets to evaluate for - if len(targets) > 0: - for target in targets: - target_str = "(" - for param_value in target: - target_str += str(param_value) - target_str += "," - target_str = target_str[:-1] - target_str += ")" - - # Use all Extra-P columns - if columns is None: - columns = [ - col - for col in thicket.statsframe.dataframe - if isinstance( - thicket.statsframe.dataframe[col].iloc[0], ModelWrapper - ) - ] - - # Error checking - for c in columns: - if c not in thicket.statsframe.dataframe.columns: - raise ValueError( - "column " - + c - + " is not in the aggregated statistics table." - ) - elif not isinstance( - thicket.statsframe.dataframe[c].iloc[0], ModelWrapper - ): - raise TypeError( - "column " - + c - + " is not the right type (thicket.model_extrap.ModelWrapper)." + if len(self.configs) == 1: + exp = self.experiments[self.configs[0]] + targets = [] + if eval_targets is None: + raise Exception( + "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." + ) + elif len(eval_targets) > 0: + # for each evaluation target check if the number of values matches the number of parameters + for target in eval_targets: + if len(target) != len(exp.parameters): + print( + "The number of given parameter values for the evaluation target need to be the same as the number of model parameters." ) + else: + targets.append(target) + + # if there are targets to evaluate for + if len(targets) > 0: + for target in targets: + target_str = "(" + for param_value in target: + target_str += str(param_value) + target_str += "," + target_str = target_str[:-1] + target_str += ")" + + # Use all Extra-P columns + if columns is None: + columns = [ + col + for col in thicket.statsframe.dataframe + if isinstance( + thicket.statsframe.dataframe[col].iloc[0], ModelWrapper + ) + ] + + # Error checking + for c in columns: + if c not in thicket.statsframe.dataframe.columns: + raise ValueError( + "column " + + c + + " is not in the aggregated statistics table." + ) + elif not isinstance( + thicket.statsframe.dataframe[c].iloc[0], ModelWrapper + ): + raise TypeError( + "column " + + c + + " is not the right type (thicket.model_extrap.ModelWrapper)." + ) - # Process each column - all_dfs = [] - all_dfs_columns = [] - for col in columns: - # Get list of components for this column - components = [ - ExtrapInterface._analyze_complexity( - model_obj, target, col, parameters - ) - for model_obj in thicket.statsframe.dataframe[col] - ] + # Process each column + all_dfs = [] + all_dfs_columns = [] + for col in columns: + # Get list of components for this column + components = [ + ExtrapInterface._analyze_complexity( + model_obj, target, col, exp.parameters + ) + for model_obj in thicket.statsframe.dataframe[col] + ] - # Component dataframe - comp_df = pd.DataFrame( - data=components, index=thicket.statsframe.dataframe.index - ) + # Component dataframe + comp_df = pd.DataFrame( + data=components, index=thicket.statsframe.dataframe.index + ) - # Add column name as index level - all_dfs_columns.append(comp_df.columns) - all_dfs.append(comp_df) - - # Concatenate dataframes horizontally - all_dfs.insert(0, thicket.statsframe.dataframe) - thicket.statsframe.dataframe = pd.concat(all_dfs, axis=1) - - # Add callpath ranking to the dataframe - all_dfs = [] - for col in columns: - total_metric_value = 0 - metric_values = [] - for model_obj in thicket.statsframe.dataframe[col]: - if not isinstance(model_obj, float): - metric_value = model_obj.mdl.hypothesis.function.evaluate( - target - ) - else: - metric_value = math.nan - total_metric_value += metric_value - metric_values.append(metric_value) - percentages = [] - for value in metric_values: - percentage = value / (total_metric_value / 100) - if percentage < 0: - percentages.append(0) - else: - percentages.append(percentage) - reverse_ranking = len(percentages) - rankdata( - percentages, method="ordinal" - ).astype(int) - for i in range(len(reverse_ranking)): - reverse_ranking[i] += 1 - ranking_list = [] - for i in range(len(reverse_ranking)): - ranking_dict = {} - ranking_dict[ - col + "_growth_rank_" + target_str - ] = reverse_ranking[i] - ranking_list.append(ranking_dict) - - # Component dataframe - comp_df = pd.DataFrame( - data=ranking_list, index=thicket.statsframe.dataframe.index - ) + # Add column name as index level + all_dfs_columns.append(comp_df.columns) + all_dfs.append(comp_df) + + # Concatenate dataframes horizontally + all_dfs.insert(0, thicket.statsframe.dataframe) + thicket.statsframe.dataframe = pd.concat(all_dfs, axis=1) + + # Add callpath ranking to the dataframe + all_dfs = [] + for col in columns: + total_metric_value = 0 + metric_values = [] + for model_obj in thicket.statsframe.dataframe[col]: + if not isinstance(model_obj, float): + metric_value = model_obj.mdl.hypothesis.function.evaluate( + target + ) + else: + metric_value = math.nan + total_metric_value += metric_value + metric_values.append(metric_value) + percentages = [] + for value in metric_values: + percentage = value / (total_metric_value / 100) + if percentage < 0: + percentages.append(0) + else: + percentages.append(percentage) + reverse_ranking = len(percentages) - rankdata( + percentages, method="ordinal" + ).astype(int) + for i in range(len(reverse_ranking)): + reverse_ranking[i] += 1 + ranking_list = [] + for i in range(len(reverse_ranking)): + ranking_dict = {} + ranking_dict[ + col + "_growth_rank_" + target_str + ] = reverse_ranking[i] + ranking_list.append(ranking_dict) + + # Component dataframe + comp_df = pd.DataFrame( + data=ranking_list, index=thicket.statsframe.dataframe.index + ) - all_dfs.append(comp_df) + all_dfs.append(comp_df) - # Concatenate dataframes horizontally - all_dfs.insert(0, thicket.statsframe.dataframe) - thicket.statsframe.dataframe = pd.concat(all_dfs, axis=1) + # Concatenate dataframes horizontally + all_dfs.insert(0, thicket.statsframe.dataframe) + thicket.statsframe.dataframe = pd.concat(all_dfs, axis=1) - # otherwise raise an Exception + # otherwise raise an Exception + else: + raise Exception( + "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." + ) else: - raise Exception( - "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." - ) + pass def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> DataFrame: """Analysis the thicket statsframe by grouping application phases such as computation and communication together to create performance models for these phases. From 0d3ffca87d2ba1a550d541ce2dffb2042347a3a0 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Wed, 29 Nov 2023 15:03:12 -0800 Subject: [PATCH 53/57] updated complexity analysis support for multi index and modeler configurations in dataframe --- thicket/model_extrap.py | 118 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 1 deletion(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index ecc44638..34c46e9a 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -1514,6 +1514,7 @@ def complexity_statsframe( ValueError: Raises a ValueError is not in the aggregates statistics table. TypeError: Raises a TypeError if the column is not of the right type. """ + if len(self.configs) == 1: exp = self.experiments[self.configs[0]] targets = [] @@ -1644,7 +1645,122 @@ def complexity_statsframe( "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." ) else: - pass + for config in self.configs: + exp = self.experiments[config] + targets = [] + if eval_targets is None: + raise Exception( + "To analyze model complexity you have to provide a target scale, a set of parameter values (one for each parameter) for which the model will be evaluated for." + ) + elif len(eval_targets) > 0: + # for each evaluation target check if the number of values matches the number of parameters + for target in eval_targets: + if len(target) != len(exp.parameters): + print( + "The number of given parameter values for the evaluation target need to be the same as the number of model parameters." + ) + else: + targets.append(target) + + if len(targets) > 0: + for target in targets: + target_str = "(" + for param_value in target: + target_str += str(param_value) + target_str += "," + target_str = target_str[:-1] + target_str += ")" + + # Use all Extra-P columns + if columns is None: + columns = [ + col + for col in thicket.statsframe.dataframe[config] + if isinstance( + thicket.statsframe.dataframe[config][col].iloc[0], ModelWrapper + ) + ] + + # Error checking + for c in columns: + if c not in thicket.statsframe.dataframe[config].columns: + raise ValueError( + "column " + + c + + " is not in the aggregated statistics table." + ) + elif not isinstance( + thicket.statsframe.dataframe[config][c].iloc[0], ModelWrapper + ): + raise TypeError( + "column " + + c + + " is not the right type (thicket.model_extrap.ModelWrapper)." + ) + + # Process each column + for col in columns: + # Get list of components for this column + components = [ + ExtrapInterface._analyze_complexity( + model_obj, target, col, exp.parameters + ) + for model_obj in thicket.statsframe.dataframe[config][col] + ] + + x = [] + for key, value in components[0].items(): + x.append([]) + counter = 0 + for key, value in components[0].items(): + for comp in components: + x[counter].append(comp[key]) + counter += 1 + + counter = 0 + for key, value in components[0].items(): + thicket.statsframe.dataframe[config, + key] = x[counter] + counter += 1 + + # Add callpath ranking to the dataframe + for col in columns: + total_metric_value = 0 + metric_values = [] + for model_obj in thicket.statsframe.dataframe[config][col]: + if not isinstance(model_obj, float): + metric_value = model_obj.mdl.hypothesis.function.evaluate( + target + ) + else: + metric_value = math.nan + total_metric_value += metric_value + metric_values.append(metric_value) + percentages = [] + for value in metric_values: + percentage = value / (total_metric_value / 100) + if percentage < 0: + percentages.append(0) + else: + percentages.append(percentage) + reverse_ranking = len(percentages) - rankdata( + percentages, method="ordinal" + ).astype(int) + for i in range(len(reverse_ranking)): + reverse_ranking[i] += 1 + """ranking_list = [] + for i in range(len(reverse_ranking)): + ranking_dict = {} + ranking_dict[ + col + "_growth_rank_" + target_str + ] = reverse_ranking[i] + ranking_list.append(ranking_dict)""" + + thicket.statsframe.dataframe[config, + str(col + "_growth_rank_" + target_str)] = reverse_ranking + + thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index( + axis=1) def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> DataFrame: """Analysis the thicket statsframe by grouping application phases such as computation and communication together to create performance models for these phases. From 6e2be1099e64a2e22bf1b6b6eb8728d09d014d0f Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Thu, 30 Nov 2023 17:18:34 -0800 Subject: [PATCH 54/57] componentize and aggregation function now support new interface --- thicket/model_extrap.py | 305 +++++++++++++++++++++------------------- 1 file changed, 161 insertions(+), 144 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 34c46e9a..c8050776 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -1342,7 +1342,7 @@ def _componentize_function( return term_dict - def componentize_statsframe(self, columns: list[str] = None) -> None: + def componentize_statsframe(self, thicket: Thicket, columns: list[str] = None) -> None: """Componentize multiple Extra-P modeling objects in the aggregated statistics table @@ -1350,51 +1350,60 @@ def componentize_statsframe(self, columns: list[str] = None) -> None: column (list): list of column names in the aggregated statistics table to componentize. Values must be of type 'thicket.model_extrap.ModelWrapper'. """ - # Use all Extra-P columns - if columns is None: - columns = [ - col - for col in thicket.statsframe.dataframe - if isinstance(thicket.statsframe.dataframe[col].iloc[0], ModelWrapper) - ] - - # Error checking - for c in columns: - if c not in thicket.statsframe.dataframe.columns: - raise ValueError( - "column " + c + " is not in the aggregated statistics table." + + if len(self.configs) == 1: + + config = self.configs[0] + exp = self.experiments[config] + + # Use all Extra-P columns + if columns is None: + columns = [ + col + for col in thicket.statsframe.dataframe + if isinstance(thicket.statsframe.dataframe[col].iloc[0], ModelWrapper) + ] + + # Error checking + for c in columns: + if c not in thicket.statsframe.dataframe.columns: + raise ValueError( + "column " + c + " is not in the aggregated statistics table." + ) + elif not isinstance(thicket.statsframe.dataframe[c].iloc[0], ModelWrapper): + raise TypeError( + "column " + + c + + " is not the right type (thicket.model_extrap.ModelWrapper)." + ) + + # Process each column + all_dfs = [] + for col in columns: + # Get list of components for this column + components = [ + ExtrapInterface._componentize_function( + model_obj, exp.parameters) + for model_obj in thicket.statsframe.dataframe[col] + ] + + # Component dataframe + comp_df = pd.DataFrame( + data=components, index=thicket.statsframe.dataframe.index ) - elif not isinstance(thicket.statsframe.dataframe[c].iloc[0], ModelWrapper): - raise TypeError( - "column " - + c - + " is not the right type (thicket.model_extrap.ModelWrapper)." + + # Add column name as index level + comp_df.columns = pd.MultiIndex.from_product( + [[col], comp_df.columns.to_list()] ) + all_dfs.append(comp_df) - # Process each column - all_dfs = [] - for col in columns: - # Get list of components for this column - components = [ - ExtrapInterface._componentize_function( - model_obj, parameters) - for model_obj in thicket.statsframe.dataframe[col] - ] - - # Component dataframe - comp_df = pd.DataFrame( - data=components, index=thicket.statsframe.dataframe.index - ) + # Concatenate dataframes horizontally + all_dfs.insert(0, thicket.statsframe.dataframe) + thicket.statsframe.dataframe = pd.concat(all_dfs, axis=1) - # Add column name as index level - comp_df.columns = pd.MultiIndex.from_product( - [[col], comp_df.columns.to_list()] - ) - all_dfs.append(comp_df) - - # Concatenate dataframes horizontally - all_dfs.insert(0, thicket.statsframe.dataframe) - thicket.statsframe.dataframe = pd.concat(all_dfs, axis=1) + else: + pass def _analyze_complexity( model_object: Model, eval_target: list[float], col: str, parameters: list[str] @@ -1762,114 +1771,122 @@ def complexity_statsframe( thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index( axis=1) - def produce_aggregated_model(self, use_median: bool = True, add_stats=True) -> DataFrame: + def produce_aggregated_model(self, thicket: Thicket, use_median: bool = True, add_stats=True) -> DataFrame: """Analysis the thicket statsframe by grouping application phases such as computation and communication together to create performance models for these phases. """ - callpaths = thicket.statsframe.dataframe["name"].values.tolist() + if len(self.configs) == 1: - # aggregate measurements inside the extra-p models from all communication functions - agg_measurements_list = [] - parameters = None - for metric in metrics: - agg_measurements = {} - for i in range(len(callpaths)): - if parameters is None: - parameters = thicket.statsframe.dataframe[ - str(metric)+"_extrap-model"].iloc[i].parameters - if not isinstance(thicket.statsframe.dataframe[ - str(metric)+"_extrap-model"].iloc[i], float): - measurement_list = thicket.statsframe.dataframe[ - str(metric)+"_extrap-model"].iloc[i].mdl.measurements - for i in range(len(measurement_list)): - measurement_list[i].coordinate - measurement_list[i].median - if measurement_list[i].coordinate not in agg_measurements: - if use_median is True: - agg_measurements[measurement_list[i] - .coordinate] = measurement_list[i].median - else: - agg_measurements[measurement_list[i] - .coordinate] = measurement_list[i].mean - else: - if use_median is True: - agg_measurements[measurement_list[i] - .coordinate] += measurement_list[i].median + config = self.configs[0] + exp = self.experiments[config] + + callpaths = thicket.statsframe.dataframe["name"].values.tolist() + + # aggregate measurements inside the extra-p models from all communication functions + agg_measurements_list = [] + parameters = None + for metric in exp.metrics: + agg_measurements = {} + for i in range(len(callpaths)): + if parameters is None: + parameters = thicket.statsframe.dataframe[ + str(metric)+"_extrap-model"].iloc[i].parameters + if not isinstance(thicket.statsframe.dataframe[ + str(metric)+"_extrap-model"].iloc[i], float): + measurement_list = thicket.statsframe.dataframe[ + str(metric)+"_extrap-model"].iloc[i].mdl.measurements + for i in range(len(measurement_list)): + measurement_list[i].coordinate + measurement_list[i].median + if measurement_list[i].coordinate not in agg_measurements: + if use_median is True: + agg_measurements[measurement_list[i] + .coordinate] = measurement_list[i].median + else: + agg_measurements[measurement_list[i] + .coordinate] = measurement_list[i].mean else: - agg_measurements[measurement_list[i] - .coordinate] += measurement_list[i].mean - agg_measurements_list.append(agg_measurements) - - # create a new Extra-P experiment, one for each phase model - experiment = Experiment() - - for metric in metrics: - metric = Metric(str(metric)) - experiment.add_metric(metric) - - aggregated_callpath = Callpath("aggregated_nodes") - experiment.add_callpath(aggregated_callpath) - - for i in range(len(next(iter(agg_measurements)))): - experiment.add_parameter( - Parameter(str(DEFAULT_PARAM_NAMES[i]))) - - for metric in metrics: - for key, value in agg_measurements.items(): - if key not in experiment.coordinates: - experiment.add_coordinate(key) - measurement = Measurement( - key, aggregated_callpath, metric, value) - experiment.add_measurement(measurement) - - # create models using the new experiment for aggregated functions - model_gen = ModelGenerator( - experiment, name="Default Model", use_median=True - ) - model_gen.model_all() - experiment.add_modeler(model_gen) - - # create empty pandas dataframe with columns only - aggregated_df = pd.DataFrame(columns=["name"]) - for metric in metrics: - if add_stats is True: - aggregated_df.insert(len(aggregated_df.columns), - str(metric)+"_extrap-model", None) - aggregated_df.insert(len(aggregated_df.columns), - str(metric)+"_RSS_extrap-model", None) - aggregated_df.insert(len(aggregated_df.columns), - str(metric)+"_rRSS_extrap-model", None) - aggregated_df.insert(len(aggregated_df.columns), - str(metric)+"_SMAPE_extrap-model", None) - aggregated_df.insert(len(aggregated_df.columns), - str(metric)+"_AR2_extrap-model", None) - aggregated_df.insert(len(aggregated_df.columns), - str(metric)+"_RE_extrap-model", None) - else: - aggregated_df.insert(len(aggregated_df.columns), - str(metric)+"_extrap-model", None) - aggregated_df.insert(len(aggregated_df.columns), - str(metric)+"_RSS_extrap-model", None) + if use_median is True: + agg_measurements[measurement_list[i] + .coordinate] += measurement_list[i].median + else: + agg_measurements[measurement_list[i] + .coordinate] += measurement_list[i].mean + agg_measurements_list.append(agg_measurements) + + # create a new Extra-P experiment, one for each phase model + experiment = Experiment() + + for metric in exp.metrics: + metric = Metric(str(metric)) + experiment.add_metric(metric) + + aggregated_callpath = Callpath("aggregated_nodes") + experiment.add_callpath(aggregated_callpath) + + for i in range(len(next(iter(agg_measurements)))): + experiment.add_parameter( + Parameter(str(DEFAULT_PARAM_NAMES[i]))) + + for metric in exp.metrics: + for key, value in agg_measurements.items(): + if key not in experiment.coordinates: + experiment.add_coordinate(key) + measurement = Measurement( + key, aggregated_callpath, metric, value) + experiment.add_measurement(measurement) + + # create models using the new experiment for aggregated functions + model_gen = ModelGenerator( + experiment, name="Default Model", use_median=True + ) + model_gen.model_all() + experiment.add_modeler(model_gen) + + # create empty pandas dataframe with columns only + aggregated_df = pd.DataFrame(columns=["name"]) + for metric in exp.metrics: + if add_stats is True: + aggregated_df.insert(len(aggregated_df.columns), + str(metric)+"_extrap-model", None) + aggregated_df.insert(len(aggregated_df.columns), + str(metric)+"_RSS_extrap-model", None) + aggregated_df.insert(len(aggregated_df.columns), + str(metric)+"_rRSS_extrap-model", None) + aggregated_df.insert(len(aggregated_df.columns), + str(metric)+"_SMAPE_extrap-model", None) + aggregated_df.insert(len(aggregated_df.columns), + str(metric)+"_AR2_extrap-model", None) + aggregated_df.insert(len(aggregated_df.columns), + str(metric)+"_RE_extrap-model", None) + else: + aggregated_df.insert(len(aggregated_df.columns), + str(metric)+"_extrap-model", None) + aggregated_df.insert(len(aggregated_df.columns), + str(metric)+"_RSS_extrap-model", None) + + new_row = ["aggregated_nodes"] + for metric in exp.metrics: + model = model_gen.models[(aggregated_callpath, metric)] + RSS = model.hypothesis._RSS + rRSS = model.hypothesis._rRSS + SMAPE = model.hypothesis._SMAPE + AR2 = model.hypothesis._AR2 + RE = model.hypothesis._RE + mdl = ModelWrapper( + model_gen.models[(aggregated_callpath, metric)], parameters, "config1") + if add_stats is True: + new_row.append(mdl) + new_row.append(RSS) + new_row.append(rRSS) + new_row.append(SMAPE) + new_row.append(AR2) + new_row.append(RE) + aggregated_df.loc[len(aggregated_df)] = new_row + return aggregated_df - new_row = ["aggregated_nodes"] - for metric in metrics: - model = model_gen.models[(aggregated_callpath, metric)] - RSS = model.hypothesis._RSS - rRSS = model.hypothesis._rRSS - SMAPE = model.hypothesis._SMAPE - AR2 = model.hypothesis._AR2 - RE = model.hypothesis._RE - mdl = ModelWrapper( - model_gen.models[(aggregated_callpath, metric)], parameters) - if add_stats is True: - new_row.append(mdl) - new_row.append(RSS) - new_row.append(rRSS) - new_row.append(SMAPE) - new_row.append(AR2) - new_row.append(RE) - aggregated_df.loc[len(aggregated_df)] = new_row - return aggregated_df + else: + pass def multi_display_one_parameter_model(model_objects): From 8642599260e78efe17761105b5549c62f5e1f8fb Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Fri, 1 Dec 2023 11:44:55 -0800 Subject: [PATCH 55/57] modeler configuration support for componentize function --- thicket/model_extrap.py | 61 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index c8050776..7e0b35b3 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -1403,7 +1403,66 @@ def componentize_statsframe(self, thicket: Thicket, columns: list[str] = None) - thicket.statsframe.dataframe = pd.concat(all_dfs, axis=1) else: - pass + for config in self.configs: + exp = self.experiments[config] + + # Use all Extra-P columns + if columns is None: + columns = [ + col + for col in thicket.statsframe.dataframe[config] + if isinstance(thicket.statsframe.dataframe[config][col].iloc[0], ModelWrapper) + ] + + # Error checking + for c in columns: + if c not in thicket.statsframe.dataframe[config].columns: + raise ValueError( + "column " + c + " is not in the aggregated statistics table." + ) + elif not isinstance(thicket.statsframe.dataframe[config][c].iloc[0], ModelWrapper): + raise TypeError( + "column " + + c + + " is not the right type (thicket.model_extrap.ModelWrapper)." + ) + + # Process each column + for col in columns: + # Get list of components for this column + components = [ + ExtrapInterface._componentize_function( + model_obj, exp.parameters) + for model_obj in thicket.statsframe.dataframe[config][col] + ] + + column_keys = [] + for comp in components: + for key, _ in comp.items(): + if key not in column_keys: + column_keys.append(key) + + x = [] + for _ in range(len(column_keys)): + x.append([]) + counter = 0 + for column_key in column_keys: + for i in range(len(components)): + comp = components[i] + if column_key in comp.keys(): + x[counter].append(comp[column_key]) + else: + x[counter].append(math.nan) + counter += 1 + + counter = 0 + for column_key in column_keys: + thicket.statsframe.dataframe[config, + column_key] = x[counter] + counter += 1 + + thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index( + axis=1) def _analyze_complexity( model_object: Model, eval_target: list[float], col: str, parameters: list[str] From 6ee3ce8658451c3a8f2a0f9eda4b387bb32fad3e Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Fri, 1 Dec 2023 15:34:24 -0800 Subject: [PATCH 56/57] updated --- thicket/model_extrap.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 7e0b35b3..54680f7a 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -789,8 +789,8 @@ def model_to_img_html(model_obj: Model) -> str: return imgstr # if the dataframe has a multi-column index - # TODO: FIX THIS CODE!!! if tht.statsframe.dataframe.columns.nlevels > 1: + x = [] for config in self.configs: # catch key errors when queriying for models with a callpath, metric combination @@ -806,18 +806,17 @@ def model_to_img_html(model_obj: Model) -> str: except KeyError: pass - # TODO iterate through configs... - frm_dict = { met + MODEL_TAG: model_to_img_html for met in existing_metrics} - tht.statsframe.dataframe[config] = tht.statsframe.dataframe[config][ + # Subset of the aggregated statistics table with only the Extra-P columns selected + x.append(tht.statsframe.dataframe[config][ [met + MODEL_TAG for met in existing_metrics] - ].to_html(escape=False, formatters=frm_dict) + ]) - # Subset of the aggregated statistics table with only the Extra-P columns selected + df = pd.concat(x) - return tht.statsframe.dataframe.to_html() + return df.to_html(escape=False, formatters=frm_dict) # if the dataframe does not have a multi-column index else: @@ -1458,7 +1457,7 @@ def componentize_statsframe(self, thicket: Thicket, columns: list[str] = None) - counter = 0 for column_key in column_keys: thicket.statsframe.dataframe[config, - column_key] = x[counter] + col + "_" + column_key] = x[counter] counter += 1 thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index( @@ -1741,13 +1740,10 @@ def complexity_statsframe( # Use all Extra-P columns if columns is None: - columns = [ - col - for col in thicket.statsframe.dataframe[config] - if isinstance( - thicket.statsframe.dataframe[config][col].iloc[0], ModelWrapper - ) - ] + columns = [] + for col in thicket.statsframe.dataframe[config]: + if isinstance(thicket.statsframe.dataframe[config][col].iloc[0], ModelWrapper): + columns.append(col) # Error checking for c in columns: From 7ee61f357262302a1891fd098990ee78aaea0a78 Mon Sep 17 00:00:00 2001 From: Marcus Ritter Date: Fri, 1 Dec 2023 15:38:40 -0800 Subject: [PATCH 57/57] black --- thicket/model_extrap.py | 639 +++++++++++++++++++++++----------------- 1 file changed, 374 insertions(+), 265 deletions(-) diff --git a/thicket/model_extrap.py b/thicket/model_extrap.py index 54680f7a..6adf435e 100644 --- a/thicket/model_extrap.py +++ b/thicket/model_extrap.py @@ -28,8 +28,13 @@ from extrap.fileio import io_helper from extrap.modelers.model_generator import ModelGenerator -from extrap.modelers.multi_parameter.multi_parameter_modeler import MultiParameterModeler -from extrap.util.options_parser import SINGLE_PARAMETER_MODELER_KEY, SINGLE_PARAMETER_OPTIONS_KEY +from extrap.modelers.multi_parameter.multi_parameter_modeler import ( + MultiParameterModeler, +) +from extrap.util.options_parser import ( + SINGLE_PARAMETER_MODELER_KEY, + SINGLE_PARAMETER_OPTIONS_KEY, +) from extrap.modelers import single_parameter from extrap.modelers import multi_parameter from extrap.entities.experiment import Experiment @@ -43,14 +48,18 @@ from extrap.entities.functions import Function from extrap.entities.terms import DEFAULT_PARAM_NAMES from extrap.entities.functions import ConstantFunction -from extrap.util.options_parser import _create_parser, _add_single_parameter_options, _modeler_option_bool +from extrap.util.options_parser import ( + _create_parser, + _add_single_parameter_options, + _modeler_option_bool, +) from extrap.modelers.modeler_options import ModelerOptionsGroup MODEL_TAG = "_extrap-model" class ExtrapModelerException(Exception): - """Custom exception class for raising exceptions when the given modeler does not exist in + """Custom exception class for raising exceptions when the given modeler does not exist in Extra-P. Args: @@ -119,14 +128,13 @@ def __str__(self) -> str: ) elif len(self.parameters) == 2: return self.mdl.hypothesis.function.to_latex_string( - Parameter(DEFAULT_PARAM_NAMES[0]), - Parameter(DEFAULT_PARAM_NAMES[1]) + Parameter(DEFAULT_PARAM_NAMES[0]), Parameter(DEFAULT_PARAM_NAMES[1]) ) elif len(self.parameters) == 3: return self.mdl.hypothesis.function.to_latex_string( Parameter(DEFAULT_PARAM_NAMES[0]), Parameter(DEFAULT_PARAM_NAMES[1]), - Parameter(DEFAULT_PARAM_NAMES[2]) + Parameter(DEFAULT_PARAM_NAMES[2]), ) else: return 1 @@ -173,8 +181,7 @@ def _display_one_parameter_model( """ # sort based on x values - measures_sorted = sorted( - self.mdl.measurements, key=lambda x: x.coordinate[0]) + measures_sorted = sorted(self.mdl.measurements, key=lambda x: x.coordinate[0]) # compute means, medians, mins, maxes params = [ms.coordinate[0] for ms in measures_sorted] # X values @@ -189,7 +196,8 @@ def _display_one_parameter_model( ) scientific_function = self.mdl.hypothesis.function.to_latex_string( - Parameter(DEFAULT_PARAM_NAMES[0])) + Parameter(DEFAULT_PARAM_NAMES[0]) + ) # compute y values for plotting y_vals = [self.mdl.hypothesis.function.evaluate(x) for x in x_vals] @@ -244,8 +252,7 @@ def _display_one_parameter_model( from math import log2 # noqa: F401 y_vals_opt.append(eval(opt_scaling_func)) - ax.plot(x_vals, y_vals_opt, - label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") except Exception as e: print( "WARNING: optimal scaling curve could not be drawn. The function needs to be interpretable by the python eval() function and the parameters need to be the same as the ones shwon on the figures. See the following exception for more information: " @@ -257,16 +264,14 @@ def _display_one_parameter_model( y_vals_opt = [] for _ in range(len(y_vals)): y_vals_opt.append(y_vals[0]) - ax.plot(x_vals, y_vals_opt, - label="optimal scaling", color="red") + ax.plot(x_vals, y_vals_opt, label="optimal scaling", color="red") else: raise Exception( "Plotting the optimal scaling automatically is currently not supported for the chosen parameter." ) # plot axes and titles - ax.set_xlabel(self.parameters[0] + " $" + - str(DEFAULT_PARAM_NAMES[0])+"$") + ax.set_xlabel(self.parameters[0] + " $" + str(DEFAULT_PARAM_NAMES[0]) + "$") ax.set_ylabel(self.mdl.metric) ax.set_title(str(self.mdl.callpath) + "()") @@ -292,11 +297,7 @@ def _display_one_parameter_model( stats_text += "SMAPE = " + smape if RSS or AR2 or SMAPE: - ax.text( - x_vals[0], - y_pos_text, - stats_text - ) + ax.text(x_vals[0], y_pos_text, stats_text) # plot legend ax.legend(loc=1) @@ -304,7 +305,9 @@ def _display_one_parameter_model( return fig, ax def _draw_legend( - self, axis: Axes, dict_callpath_color: dict[str, list[str]], + self, + axis: Axes, + dict_callpath_color: dict[str, list[str]], ) -> None: """This method draws a legend for 3D plots. @@ -353,8 +356,7 @@ def _draw_legend( ) handles.append(mark) - axis.legend(handles=handles, loc="lower center", - bbox_to_anchor=(1.75, 0.5)) + axis.legend(handles=handles, loc="lower center", bbox_to_anchor=(1.75, 0.5)) def _display_two_parameter_model( self, @@ -388,8 +390,7 @@ def _display_two_parameter_model( # sort based on x and y values measures_sorted = sorted( - self.mdl.measurements, key=lambda x: ( - x.coordinate[0], x.coordinate[1]) + self.mdl.measurements, key=lambda x: (x.coordinate[0], x.coordinate[1]) ) # get x, y value from measurements @@ -403,11 +404,9 @@ def _display_two_parameter_model( maxes = [ms.maximum for ms in measures_sorted] # x value plotting range. Dynamic based off what the largest/smallest values are - x_vals = np.linspace( - start=X_params[0], stop=1.5 * X_params[-1], num=100) + x_vals = np.linspace(start=X_params[0], stop=1.5 * X_params[-1], num=100) # y value plotting range. Dynamic based off what the largest/smallest values are - y_vals = np.linspace( - start=Y_params[0], stop=1.5 * Y_params[-1], num=100) + y_vals = np.linspace(start=Y_params[0], stop=1.5 * Y_params[-1], num=100) x_vals, y_vals = np.meshgrid(x_vals, y_vals) if isinstance(self.mdl.hypothesis.function, ConstantFunction) is True: @@ -415,8 +414,7 @@ def _display_two_parameter_model( for i in range(len(x_vals)): zx = [] for j in range(len(x_vals[0])): - zx.append(self.mdl.hypothesis.function.evaluate( - [x_vals, y_vals])) + zx.append(self.mdl.hypothesis.function.evaluate([x_vals, y_vals])) zy.append(zx) z_vals = np.reshape(zy, (len(x_vals), len(y_vals))).T else: @@ -516,34 +514,29 @@ def opt_scaling_func_user(p, q): X_params, Y_params, medians, c="black", marker="x", label="median" ) if show_mean: - ax.scatter(X_params, Y_params, means, - c="black", marker="+", label="mean") + ax.scatter(X_params, Y_params, means, c="black", marker="+", label="mean") if show_min_max: - ax.scatter(X_params, Y_params, mins, - c="black", marker="_", label="min") - ax.scatter(X_params, Y_params, maxes, - c="black", marker="_", label="max") + ax.scatter(X_params, Y_params, mins, c="black", marker="_", label="min") + ax.scatter(X_params, Y_params, maxes, c="black", marker="_", label="max") # Draw connecting line for min, max -> error bars line_x, line_y, line_z = [], [], [] for x, y, min_v, max_v in zip(X_params, Y_params, mins, maxes): line_x.append(x), line_x.append(x) line_y.append(y), line_y.append(y) line_z.append(min_v), line_z.append(max_v) - line_x.append(np.nan), line_y.append( - np.nan), line_z.append(np.nan) + line_x.append(np.nan), line_y.append(np.nan), line_z.append(np.nan) ax.plot(line_x, line_y, line_z, color="black") # axis labels and title - ax.set_xlabel(self.parameters[0] + " $" + - str(DEFAULT_PARAM_NAMES[0])+"$") - ax.set_ylabel(self.parameters[1] + " $" + - str(DEFAULT_PARAM_NAMES[1])+"$") + ax.set_xlabel(self.parameters[0] + " $" + str(DEFAULT_PARAM_NAMES[0]) + "$") + ax.set_ylabel(self.parameters[1] + " $" + str(DEFAULT_PARAM_NAMES[1]) + "$") ax.set_zlabel(self.mdl.metric) ax.set_title(str(self.mdl.callpath) + "()") # create scientific representation of create performance model scientific_function = self.mdl.hypothesis.function.to_latex_string( - Parameter(DEFAULT_PARAM_NAMES[0]), Parameter(DEFAULT_PARAM_NAMES[1])) + Parameter(DEFAULT_PARAM_NAMES[0]), Parameter(DEFAULT_PARAM_NAMES[1]) + ) # create dict for legend color and markers dict_callpath_color = {} @@ -661,7 +654,7 @@ def display( class ExtrapInterface: - """A class that functions as an interface between Thicket and Extra-P + """A class that functions as an interface between Thicket and Extra-P to load the data from a thicket into Extra-P, create performance models, append them to a thicket, and display the models.""" @@ -669,9 +662,15 @@ def __init__(self) -> None: """ Create a new Extra-P Interface object. """ - self.modelers_list = list(set(k.lower() for k in chain( - single_parameter.all_modelers.keys(), - multi_parameter.all_modelers.keys()))) + self.modelers_list = list( + set( + k.lower() + for k in chain( + single_parameter.all_modelers.keys(), + multi_parameter.all_modelers.keys(), + ) + ) + ) self.configs = [] self.experiments = {} @@ -689,19 +688,29 @@ def print_modeler_options(self, modeler_name: str) -> None: text += "--------------\n" modeler = self._check_modeler_name(modeler_name) if modeler is not None: - if hasattr(modeler, 'OPTIONS'): + if hasattr(modeler, "OPTIONS"): for name, option in modeler.OPTIONS.items(): if isinstance(option, ModelerOptionsGroup): for o in option.options: metavar = o.range or o.type.__name__.upper() - text += str(o.field) + "\t " + str(metavar) + \ - "\t " + \ - str(o.description) + "\n" + text += ( + str(o.field) + + "\t " + + str(metavar) + + "\t " + + str(o.description) + + "\n" + ) else: metavar = option.range or option.type.__name__.upper() - text += str(option.field) + "\t " + str(metavar) + \ - "\t " + \ - str(option.description) + "\n" + text += ( + str(option.field) + + "\t " + + str(metavar) + + "\t " + + str(option.description) + + "\n" + ) print(text) def _check_modeler_name(self, modeler_name): @@ -714,16 +723,18 @@ def _check_modeler_name(self, modeler_name): modeler = multi_parameter.all_modelers[modeler_name] else: raise ExtrapModelerException( - "The given modeler does not exist. Valid options are: "+str(self.modelers_list)) + "The given modeler does not exist. Valid options are: " + + str(self.modelers_list) + ) except ExtrapModelerException as e: - print("WARNING: "+e.message) + print("WARNING: " + e.message) return modeler def _check_modeler_options(self, modeler_name): modeler = self._check_modeler_name(modeler_name) options = {} if modeler is not None: - if hasattr(modeler, 'OPTIONS'): + if hasattr(modeler, "OPTIONS"): for name, option in modeler.OPTIONS.items(): if isinstance(option, ModelerOptionsGroup): for o in option.options: @@ -783,8 +794,7 @@ def model_to_img_html(model_obj: Model) -> str: fig.savefig(figfile, format="jpg", transparent=False) figfile.seek(0) figdata_jpg = base64.b64encode(figfile.getvalue()).decode() - imgstr = ''.format( - figdata_jpg) + imgstr = ''.format(figdata_jpg) plt.close(fig) return imgstr @@ -792,7 +802,6 @@ def model_to_img_html(model_obj: Model) -> str: if tht.statsframe.dataframe.columns.nlevels > 1: x = [] for config in self.configs: - # catch key errors when queriying for models with a callpath, metric combination # that does not exist because there was no measurement object created for them existing_metrics = [] @@ -807,12 +816,15 @@ def model_to_img_html(model_obj: Model) -> str: pass frm_dict = { - met + MODEL_TAG: model_to_img_html for met in existing_metrics} + met + MODEL_TAG: model_to_img_html for met in existing_metrics + } # Subset of the aggregated statistics table with only the Extra-P columns selected - x.append(tht.statsframe.dataframe[config][ - [met + MODEL_TAG for met in existing_metrics] - ]) + x.append( + tht.statsframe.dataframe[config][ + [met + MODEL_TAG for met in existing_metrics] + ] + ) df = pd.concat(x) @@ -833,8 +845,7 @@ def model_to_img_html(model_obj: Model) -> str: except KeyError: pass - frm_dict = { - met + MODEL_TAG: model_to_img_html for met in existing_metrics} + frm_dict = {met + MODEL_TAG: model_to_img_html for met in existing_metrics} # Subset of the aggregated statistics table with only the Extra-P columns selected return tht.statsframe.dataframe[ @@ -865,22 +876,21 @@ def _add_extrap_statistics(self, tht: Thicket, node: node, metric: str) -> None: tht.statsframe.dataframe.at[ node, metric + "_AR2" + MODEL_TAG ] = hypothesis_fn.AR2 - tht.statsframe.dataframe.at[ - node, metric + "_RE" + MODEL_TAG - ] = hypothesis_fn.RE - - def create_models(self, - tht: Thicket, - model_name: str, - parameters: list[str] = None, - metrics: list[str] = None, - use_median: bool = True, - calc_total_metrics: bool = False, - scaling_parameter: str = "jobsize", - add_stats: bool = True, - modeler: str = "default", - modeler_options: dict = None - ) -> None: + tht.statsframe.dataframe.at[node, metric + "_RE" + MODEL_TAG] = hypothesis_fn.RE + + def create_models( + self, + tht: Thicket, + model_name: str, + parameters: list[str] = None, + metrics: list[str] = None, + use_median: bool = True, + calc_total_metrics: bool = False, + scaling_parameter: str = "jobsize", + add_stats: bool = True, + modeler: str = "default", + modeler_options: dict = None, + ) -> None: """Converts the data in the given thicket into a format that can be read by Extra-P. Then the Extra-P modeler is called with the given options and creates a performance model for @@ -907,7 +917,7 @@ def create_models(self, average runtime/rank. (Default=False) scaling_parameter (String): Set the scaling parameter for the total metric calculation. This parameter is only used when - calc_total_metrics=True. One needs to provide either the + calc_total_metrics=True. One needs to provide either the name of the parameter that models the resource allocation, e.g., the jobsize, or a fixed int value as a String, when only scaling, e.g., the problem size, and the resource @@ -926,8 +936,11 @@ def create_models(self, # add this configuration to the list of the interface try: if model_name in self.configs: - raise Exception("A configuration with the name '" + - str(model_name)+"' already exists. Choose another name!") + raise Exception( + "A configuration with the name '" + + str(model_name) + + "' already exists. Choose another name!" + ) else: self.configs.append(model_name) except Exception as e: @@ -956,8 +969,7 @@ def create_models(self, experiment.add_parameter(Parameter(parameter)) # Ordering of profiles in the performance data table - ensemble_profile_ordering = list( - tht.dataframe.index.unique(level=1)) + ensemble_profile_ordering = list(tht.dataframe.index.unique(level=1)) profile_parameter_value_mapping = {} for profile in ensemble_profile_ordering: @@ -1028,8 +1040,7 @@ def create_models(self, ): coordinate_exists = True try: - value = single_prof_df[str( - metric)].tolist() + value = single_prof_df[str(metric)].tolist() except Exception: raise ExtrapReaderException( "The metric '" @@ -1047,8 +1058,7 @@ def create_models(self, # read out scaling parameter for total metric value calculation # if the resource allocation is static if scaling_parameter.isnumeric(): - ranks = int( - scaling_parameter) + ranks = int(scaling_parameter) # otherwise read number of ranks from the provided parameter else: # check if the parameter exists @@ -1081,8 +1091,7 @@ def create_models(self, + ".", profile, ) - values.append( - value[0] * ranks) + values.append(value[0] * ranks) # add values for all other metrics else: values.append(value[0]) @@ -1141,26 +1150,24 @@ def create_models(self, # check if the given modeler exists if modeler.lower() not in self.modelers_list: - raise ExtrapModelerException("The given modeler does not exist in Extra-P. Valid options are: "+str( - self.modelers_list)+". Using default modeler instead.") + raise ExtrapModelerException( + "The given modeler does not exist in Extra-P. Valid options are: " + + str(self.modelers_list) + + ". Using default modeler instead." + ) modeler = "default" # special dict to check if all given options for the modeler do exist for the given modeler - modeler_options_check, base_modeler_name = self._check_modeler_options( - modeler) + modeler_options_check, base_modeler_name = self._check_modeler_options(modeler) # create a model generator object for the experiment model_generator = ModelGenerator( - experiment, - modeler=modeler, - name=model_name, - use_median=use_median + experiment, modeler=modeler, name=model_name, use_median=use_median ) # apply modeler options modeler = model_generator.modeler if isinstance(modeler, MultiParameterModeler) and modeler_options is not None: - # if there are no single parameter options, modeler defined in the options go with the default values if "#single_parameter_modeler" not in modeler_options: modeler_options["#single_parameter_modeler"] = "default" @@ -1170,27 +1177,46 @@ def create_models(self, # set single-parameter modeler of multi-parameter modeler single_modeler = modeler_options[SINGLE_PARAMETER_MODELER_KEY] if single_modeler is not None: - modeler.single_parameter_modeler = single_parameter.all_modelers[single_modeler]( - ) + modeler.single_parameter_modeler = single_parameter.all_modelers[ + single_modeler + ]() # special dict to check if all given options for the modeler do exist for the given modeler - single_modeler_options_check, single_modeler_name = self._check_modeler_options( - single_modeler) + ( + single_modeler_options_check, + single_modeler_name, + ) = self._check_modeler_options(single_modeler) # apply options of single-parameter modeler if modeler.single_parameter_modeler is not None: - for name, value in modeler_options[SINGLE_PARAMETER_OPTIONS_KEY].items(): + for name, value in modeler_options[ + SINGLE_PARAMETER_OPTIONS_KEY + ].items(): if name not in single_modeler_options_check: - print("WARNING: The option "+str(name) + - " does not exist for the modeler: "+str(single_modeler_name)+". Extra-P will ignore this parameter.") + print( + "WARNING: The option " + + str(name) + + " does not exist for the modeler: " + + str(single_modeler_name) + + ". Extra-P will ignore this parameter." + ) if value is not None: setattr(modeler.single_parameter_modeler, name, value) if modeler_options is not None: for name, value in modeler_options.items(): - if name not in modeler_options_check and name != "#single_parameter_modeler" and name != "#single_parameter_options": - print("WARNING: The option "+str(name) + - " does not exist for the modeler: "+str(base_modeler_name)+". Extra-P will ignore this parameter.") + if ( + name not in modeler_options_check + and name != "#single_parameter_modeler" + and name != "#single_parameter_options" + ): + print( + "WARNING: The option " + + str(name) + + " does not exist for the modeler: " + + str(base_modeler_name) + + ". Extra-P will ignore this parameter." + ) if value is not None: setattr(modeler, name, value) @@ -1202,7 +1228,6 @@ def create_models(self, # check if dataframe has already a multi column index if tht.statsframe.dataframe.columns.nlevels > 1: - # create a list with the column names column_names = [] column_names.append("name") @@ -1224,19 +1249,15 @@ def create_models(self, mkey = (Callpath(thicket_node.frame["name"]), metric) try: model_wrapper = ModelWrapper( - model_generator.models[mkey], parameters, model_name) + model_generator.models[mkey], parameters, model_name + ) row.append(model_wrapper) if add_stats: - row.append( - model_wrapper.mdl.hypothesis.RSS) - row.append( - model_wrapper.mdl.hypothesis.rRSS) - row.append( - model_wrapper.mdl.hypothesis.SMAPE) - row.append( - model_wrapper.mdl.hypothesis.AR2) - row.append( - model_wrapper.mdl.hypothesis.RE) + row.append(model_wrapper.mdl.hypothesis.RSS) + row.append(model_wrapper.mdl.hypothesis.rRSS) + row.append(model_wrapper.mdl.hypothesis.SMAPE) + row.append(model_wrapper.mdl.hypothesis.AR2) + row.append(model_wrapper.mdl.hypothesis.RE) except KeyError: row.append(math.nan) if add_stats: @@ -1249,8 +1270,13 @@ def create_models(self, data = np.array(table) # join with existing thicket - tht.statsframe.dataframe = tht.statsframe.dataframe.join(pd.DataFrame( - data, columns=pd.MultiIndex.from_product([[model_name], column_names]), index=tht.statsframe.dataframe.index)) + tht.statsframe.dataframe = tht.statsframe.dataframe.join( + pd.DataFrame( + data, + columns=pd.MultiIndex.from_product([[model_name], column_names]), + index=tht.statsframe.dataframe.index, + ) + ) else: # check if there is already a extra-p model in the dataframe @@ -1258,8 +1284,9 @@ def create_models(self, modeler_name = None for metric in experiment.metrics: try: - modeler_name = tht.statsframe.dataframe.at[thicket_node, - str(metric) + MODEL_TAG].name + modeler_name = tht.statsframe.dataframe.at[ + thicket_node, str(metric) + MODEL_TAG + ].name model_exists = True except KeyError: pass @@ -1269,7 +1296,8 @@ def create_models(self, remove_columns.remove("name") for i in range(len(remove_columns)): tht.statsframe.dataframe = tht.statsframe.dataframe.drop( - columns=remove_columns[i]) + columns=remove_columns[i] + ) for callpath in experiment.callpaths: for metric in experiment.metrics: mkey = (callpath, metric) @@ -1280,20 +1308,25 @@ def create_models(self, try: tht.statsframe.dataframe.at[ thicket_node, str(metric) + MODEL_TAG - ] = ModelWrapper(model_generator.models[mkey], parameters, model_name) + ] = ModelWrapper( + model_generator.models[mkey], parameters, model_name + ) # Add statistics to aggregated statistics table if add_stats: self._add_extrap_statistics( - tht, thicket_node, str(metric)) + tht, thicket_node, str(metric) + ) except Exception as e: print(e) pass # if there is already a model in the dataframe, concat them and add a multi column index if model_exists is True: - tht.statsframe.dataframe = pd.concat( - [tht2.statsframe.dataframe, tht.statsframe.dataframe], axis=1, keys=[str(modeler_name), str(model_name)]) + [tht2.statsframe.dataframe, tht.statsframe.dataframe], + axis=1, + keys=[str(modeler_name), str(model_name)], + ) self.experiments[model_name] = experiment @@ -1320,8 +1353,9 @@ def _componentize_function( for term in fnc.compound_terms: if len(parameters) == 1: # Join variables of the same term together - variable_column = " * ".join(t.to_string() - for t in term.simple_terms) + variable_column = " * ".join( + t.to_string() for t in term.simple_terms + ) term_dict[variable_column] = term.coefficient else: @@ -1341,7 +1375,9 @@ def _componentize_function( return term_dict - def componentize_statsframe(self, thicket: Thicket, columns: list[str] = None) -> None: + def componentize_statsframe( + self, thicket: Thicket, columns: list[str] = None + ) -> None: """Componentize multiple Extra-P modeling objects in the aggregated statistics table @@ -1351,7 +1387,6 @@ def componentize_statsframe(self, thicket: Thicket, columns: list[str] = None) - """ if len(self.configs) == 1: - config = self.configs[0] exp = self.experiments[config] @@ -1360,7 +1395,9 @@ def componentize_statsframe(self, thicket: Thicket, columns: list[str] = None) - columns = [ col for col in thicket.statsframe.dataframe - if isinstance(thicket.statsframe.dataframe[col].iloc[0], ModelWrapper) + if isinstance( + thicket.statsframe.dataframe[col].iloc[0], ModelWrapper + ) ] # Error checking @@ -1369,7 +1406,9 @@ def componentize_statsframe(self, thicket: Thicket, columns: list[str] = None) - raise ValueError( "column " + c + " is not in the aggregated statistics table." ) - elif not isinstance(thicket.statsframe.dataframe[c].iloc[0], ModelWrapper): + elif not isinstance( + thicket.statsframe.dataframe[c].iloc[0], ModelWrapper + ): raise TypeError( "column " + c @@ -1381,8 +1420,7 @@ def componentize_statsframe(self, thicket: Thicket, columns: list[str] = None) - for col in columns: # Get list of components for this column components = [ - ExtrapInterface._componentize_function( - model_obj, exp.parameters) + ExtrapInterface._componentize_function(model_obj, exp.parameters) for model_obj in thicket.statsframe.dataframe[col] ] @@ -1410,16 +1448,23 @@ def componentize_statsframe(self, thicket: Thicket, columns: list[str] = None) - columns = [ col for col in thicket.statsframe.dataframe[config] - if isinstance(thicket.statsframe.dataframe[config][col].iloc[0], ModelWrapper) + if isinstance( + thicket.statsframe.dataframe[config][col].iloc[0], + ModelWrapper, + ) ] # Error checking for c in columns: if c not in thicket.statsframe.dataframe[config].columns: raise ValueError( - "column " + c + " is not in the aggregated statistics table." + "column " + + c + + " is not in the aggregated statistics table." ) - elif not isinstance(thicket.statsframe.dataframe[config][c].iloc[0], ModelWrapper): + elif not isinstance( + thicket.statsframe.dataframe[config][c].iloc[0], ModelWrapper + ): raise TypeError( "column " + c @@ -1431,7 +1476,8 @@ def componentize_statsframe(self, thicket: Thicket, columns: list[str] = None) - # Get list of components for this column components = [ ExtrapInterface._componentize_function( - model_obj, exp.parameters) + model_obj, exp.parameters + ) for model_obj in thicket.statsframe.dataframe[config][col] ] @@ -1456,12 +1502,14 @@ def componentize_statsframe(self, thicket: Thicket, columns: list[str] = None) - counter = 0 for column_key in column_keys: - thicket.statsframe.dataframe[config, - col + "_" + column_key] = x[counter] + thicket.statsframe.dataframe[ + config, col + "_" + column_key + ] = x[counter] counter += 1 thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index( - axis=1) + axis=1 + ) def _analyze_complexity( model_object: Model, eval_target: list[float], col: str, parameters: list[str] @@ -1497,8 +1545,7 @@ def _analyze_complexity( if len(fnc.compound_terms) == 0: complexity_class = "1" coefficient = fnc.constant_coefficient - return_value[col + "_complexity_" + - target_str] = complexity_class + return_value[col + "_complexity_" + target_str] = complexity_class return_value[col + "_coefficient_" + target_str] = coefficient else: @@ -1526,15 +1573,19 @@ def _analyze_complexity( comp = comp.replace("^", "**") complexity_class = "" + comp + "" coefficient = terms[max_index].coefficient - return_value[col + "_complexity_" + - target_str] = complexity_class - return_value[col + "_coefficient_" + - target_str] = coefficient + return_value[ + col + "_complexity_" + target_str + ] = complexity_class + return_value[col + "_coefficient_" + target_str] = coefficient else: comp = "" - for parameter_term_pair in terms[max_index].parameter_term_pairs: + for parameter_term_pair in terms[ + max_index + ].parameter_term_pairs: # [0] to get the index of the paramete - term_parameter_str = DEFAULT_PARAM_NAMES[parameter_term_pair[0]] + term_parameter_str = DEFAULT_PARAM_NAMES[ + parameter_term_pair[0] + ] # [1] to get the term if comp == "": comp += parameter_term_pair[1].to_string( @@ -1550,25 +1601,28 @@ def _analyze_complexity( ) comp = comp.replace("^", "**") complexity_class = "" + comp + "" - return_value[col + "_complexity_" + - target_str] = complexity_class - return_value[col + "_coefficient_" + - target_str] = term.coefficient + return_value[ + col + "_complexity_" + target_str + ] = complexity_class + return_value[ + col + "_coefficient_" + target_str + ] = term.coefficient else: complexity_class = "1" coefficient = fnc.constant_coefficient - return_value[col + "_complexity_" + - target_str] = complexity_class - return_value[col + "_coefficient_" + - target_str] = coefficient + return_value[col + "_complexity_" + target_str] = complexity_class + return_value[col + "_coefficient_" + target_str] = coefficient return return_value from typing import List def complexity_statsframe( - self, thicket: Thicket, columns: list[str] = None, eval_targets: list[list[float]] = None + self, + thicket: Thicket, + columns: list[str] = None, + eval_targets: list[list[float]] = None, ) -> None: """Analyzes the complexity of the Extra-P models for the given thicket statsframe and the list of selected columns (metrics) for a given target evaluation scale. Then adds the results back into the statsframe. @@ -1668,8 +1722,8 @@ def complexity_statsframe( metric_values = [] for model_obj in thicket.statsframe.dataframe[col]: if not isinstance(model_obj, float): - metric_value = model_obj.mdl.hypothesis.function.evaluate( - target + metric_value = ( + model_obj.mdl.hypothesis.function.evaluate(target) ) else: metric_value = math.nan @@ -1741,8 +1795,11 @@ def complexity_statsframe( # Use all Extra-P columns if columns is None: columns = [] - for col in thicket.statsframe.dataframe[config]: - if isinstance(thicket.statsframe.dataframe[config][col].iloc[0], ModelWrapper): + for col in thicket.statsframe.dataframe[config]: + if isinstance( + thicket.statsframe.dataframe[config][col].iloc[0], + ModelWrapper, + ): columns.append(col) # Error checking @@ -1754,7 +1811,8 @@ def complexity_statsframe( + " is not in the aggregated statistics table." ) elif not isinstance( - thicket.statsframe.dataframe[config][c].iloc[0], ModelWrapper + thicket.statsframe.dataframe[config][c].iloc[0], + ModelWrapper, ): raise TypeError( "column " @@ -1769,7 +1827,9 @@ def complexity_statsframe( ExtrapInterface._analyze_complexity( model_obj, target, col, exp.parameters ) - for model_obj in thicket.statsframe.dataframe[config][col] + for model_obj in thicket.statsframe.dataframe[config][ + col + ] ] x = [] @@ -1783,8 +1843,7 @@ def complexity_statsframe( counter = 0 for key, value in components[0].items(): - thicket.statsframe.dataframe[config, - key] = x[counter] + thicket.statsframe.dataframe[config, key] = x[counter] counter += 1 # Add callpath ranking to the dataframe @@ -1793,8 +1852,10 @@ def complexity_statsframe( metric_values = [] for model_obj in thicket.statsframe.dataframe[config][col]: if not isinstance(model_obj, float): - metric_value = model_obj.mdl.hypothesis.function.evaluate( - target + metric_value = ( + model_obj.mdl.hypothesis.function.evaluate( + target + ) ) else: metric_value = math.nan @@ -1820,18 +1881,20 @@ def complexity_statsframe( ] = reverse_ranking[i] ranking_list.append(ranking_dict)""" - thicket.statsframe.dataframe[config, - str(col + "_growth_rank_" + target_str)] = reverse_ranking + thicket.statsframe.dataframe[ + config, str(col + "_growth_rank_" + target_str) + ] = reverse_ranking - thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index( - axis=1) + thicket.statsframe.dataframe = ( + thicket.statsframe.dataframe.sort_index(axis=1) + ) - def produce_aggregated_model(self, thicket: Thicket, use_median: bool = True, add_stats=True) -> DataFrame: - """Analysis the thicket statsframe by grouping application phases such as computation and communication together to create performance models for these phases. - """ + def produce_aggregated_model( + self, thicket: Thicket, use_median: bool = True, add_stats=True + ) -> DataFrame: + """Analysis the thicket statsframe by grouping application phases such as computation and communication together to create performance models for these phases.""" if len(self.configs) == 1: - config = self.configs[0] exp = self.experiments[config] @@ -1844,29 +1907,43 @@ def produce_aggregated_model(self, thicket: Thicket, use_median: bool = True, ad agg_measurements = {} for i in range(len(callpaths)): if parameters is None: - parameters = thicket.statsframe.dataframe[ - str(metric)+"_extrap-model"].iloc[i].parameters - if not isinstance(thicket.statsframe.dataframe[ - str(metric)+"_extrap-model"].iloc[i], float): - measurement_list = thicket.statsframe.dataframe[ - str(metric)+"_extrap-model"].iloc[i].mdl.measurements + parameters = ( + thicket.statsframe.dataframe[str(metric) + "_extrap-model"] + .iloc[i] + .parameters + ) + if not isinstance( + thicket.statsframe.dataframe[ + str(metric) + "_extrap-model" + ].iloc[i], + float, + ): + measurement_list = ( + thicket.statsframe.dataframe[str(metric) + "_extrap-model"] + .iloc[i] + .mdl.measurements + ) for i in range(len(measurement_list)): measurement_list[i].coordinate measurement_list[i].median if measurement_list[i].coordinate not in agg_measurements: if use_median is True: - agg_measurements[measurement_list[i] - .coordinate] = measurement_list[i].median + agg_measurements[ + measurement_list[i].coordinate + ] = measurement_list[i].median else: - agg_measurements[measurement_list[i] - .coordinate] = measurement_list[i].mean + agg_measurements[ + measurement_list[i].coordinate + ] = measurement_list[i].mean else: if use_median is True: - agg_measurements[measurement_list[i] - .coordinate] += measurement_list[i].median + agg_measurements[ + measurement_list[i].coordinate + ] += measurement_list[i].median else: - agg_measurements[measurement_list[i] - .coordinate] += measurement_list[i].mean + agg_measurements[ + measurement_list[i].coordinate + ] += measurement_list[i].mean agg_measurements_list.append(agg_measurements) # create a new Extra-P experiment, one for each phase model @@ -1880,15 +1957,13 @@ def produce_aggregated_model(self, thicket: Thicket, use_median: bool = True, ad experiment.add_callpath(aggregated_callpath) for i in range(len(next(iter(agg_measurements)))): - experiment.add_parameter( - Parameter(str(DEFAULT_PARAM_NAMES[i]))) + experiment.add_parameter(Parameter(str(DEFAULT_PARAM_NAMES[i]))) for metric in exp.metrics: for key, value in agg_measurements.items(): if key not in experiment.coordinates: experiment.add_coordinate(key) - measurement = Measurement( - key, aggregated_callpath, metric, value) + measurement = Measurement(key, aggregated_callpath, metric, value) experiment.add_measurement(measurement) # create models using the new experiment for aggregated functions @@ -1902,23 +1977,43 @@ def produce_aggregated_model(self, thicket: Thicket, use_median: bool = True, ad aggregated_df = pd.DataFrame(columns=["name"]) for metric in exp.metrics: if add_stats is True: - aggregated_df.insert(len(aggregated_df.columns), - str(metric)+"_extrap-model", None) - aggregated_df.insert(len(aggregated_df.columns), - str(metric)+"_RSS_extrap-model", None) - aggregated_df.insert(len(aggregated_df.columns), - str(metric)+"_rRSS_extrap-model", None) - aggregated_df.insert(len(aggregated_df.columns), - str(metric)+"_SMAPE_extrap-model", None) - aggregated_df.insert(len(aggregated_df.columns), - str(metric)+"_AR2_extrap-model", None) - aggregated_df.insert(len(aggregated_df.columns), - str(metric)+"_RE_extrap-model", None) + aggregated_df.insert( + len(aggregated_df.columns), str(metric) + "_extrap-model", None + ) + aggregated_df.insert( + len(aggregated_df.columns), + str(metric) + "_RSS_extrap-model", + None, + ) + aggregated_df.insert( + len(aggregated_df.columns), + str(metric) + "_rRSS_extrap-model", + None, + ) + aggregated_df.insert( + len(aggregated_df.columns), + str(metric) + "_SMAPE_extrap-model", + None, + ) + aggregated_df.insert( + len(aggregated_df.columns), + str(metric) + "_AR2_extrap-model", + None, + ) + aggregated_df.insert( + len(aggregated_df.columns), + str(metric) + "_RE_extrap-model", + None, + ) else: - aggregated_df.insert(len(aggregated_df.columns), - str(metric)+"_extrap-model", None) - aggregated_df.insert(len(aggregated_df.columns), - str(metric)+"_RSS_extrap-model", None) + aggregated_df.insert( + len(aggregated_df.columns), str(metric) + "_extrap-model", None + ) + aggregated_df.insert( + len(aggregated_df.columns), + str(metric) + "_RSS_extrap-model", + None, + ) new_row = ["aggregated_nodes"] for metric in exp.metrics: @@ -1929,7 +2024,10 @@ def produce_aggregated_model(self, thicket: Thicket, use_median: bool = True, ad AR2 = model.hypothesis._AR2 RE = model.hypothesis._RE mdl = ModelWrapper( - model_gen.models[(aggregated_callpath, metric)], parameters, "config1") + model_gen.models[(aggregated_callpath, metric)], + parameters, + "config1", + ) if add_stats is True: new_row.append(mdl) new_row.append(RSS) @@ -1945,38 +2043,37 @@ def produce_aggregated_model(self, thicket: Thicket, use_median: bool = True, ad def multi_display_one_parameter_model(model_objects): - functions = [] scientific_functions = [] for model_object in model_objects: functions.append(model_object.mdl.hypothesis.function) scientific_functions.append( - model_object.mdl.hypothesis.function.to_latex_string(Parameter(DEFAULT_PARAM_NAMES[0]))) + model_object.mdl.hypothesis.function.to_latex_string( + Parameter(DEFAULT_PARAM_NAMES[0]) + ) + ) # sort based on x values measures_sorted = sorted( - model_objects[0].mdl.measurements, key=lambda x: x.coordinate[0]) + model_objects[0].mdl.measurements, key=lambda x: x.coordinate[0] + ) # compute means, medians, mins, maxes params = [ms.coordinate[0] for ms in measures_sorted] # X values # x value plotting range, dynamic based off what the largest/smallest values are - x_vals = np.arange( - params[0], 1.5 * params[-1], (params[-1] - params[0]) / 100.0 - ) + x_vals = np.arange(params[0], 1.5 * params[-1], (params[-1] - params[0]) / 100.0) y_vals_list = [] for model_object in model_objects: # compute y values for plotting - y_vals = [model_object.mdl.hypothesis.function.evaluate( - x) for x in x_vals] + y_vals = [model_object.mdl.hypothesis.function.evaluate(x) for x in x_vals] y_vals_list.append(y_vals) plt.ioff() fig, ax = plt.subplots() - range_values = np.arange( - 0, 1, 1 / len(model_objects)) + range_values = np.arange(0, 1, 1 / len(model_objects)) if len(model_objects) <= 20: colormap = "tab20" else: @@ -1989,14 +2086,17 @@ def multi_display_one_parameter_model(model_objects): # plot the model for i in range(len(model_objects)): - ax.plot(x_vals, y_vals_list[i], - label=str(model_objects[i].mdl.callpath) + - ": "+scientific_functions[i], - color=rgbas[i]) + ax.plot( + x_vals, + y_vals_list[i], + label=str(model_objects[i].mdl.callpath) + ": " + scientific_functions[i], + color=rgbas[i], + ) # plot axes and titles - ax.set_xlabel(model_objects[0].parameters[0] + " $" + - str(DEFAULT_PARAM_NAMES[0])+"$") + ax.set_xlabel( + model_objects[0].parameters[0] + " $" + str(DEFAULT_PARAM_NAMES[0]) + "$" + ) ax.set_ylabel(model_objects[0].mdl.metric) # plot legend @@ -2006,7 +2106,6 @@ def multi_display_one_parameter_model(model_objects): def multi_display_two_parameter_model(model_objects): - parameters = model_objects[0].parameters functions = [] @@ -2014,11 +2113,13 @@ def multi_display_two_parameter_model(model_objects): for model_object in model_objects: functions.append(model_object.mdl.hypothesis.function) scientific_functions.append( - model_object.mdl.hypothesis.function.to_latex_string(Parameter(DEFAULT_PARAM_NAMES[0]), Parameter(DEFAULT_PARAM_NAMES[1]))) + model_object.mdl.hypothesis.function.to_latex_string( + Parameter(DEFAULT_PARAM_NAMES[0]), Parameter(DEFAULT_PARAM_NAMES[1]) + ) + ) # chose the color map to take the colors from dynamically - range_values = np.arange( - 0, 1, 1 / len(model_objects)) + range_values = np.arange(0, 1, 1 / len(model_objects)) if len(model_objects) <= 20: colormap = "tab20" else: @@ -2030,8 +2131,9 @@ def multi_display_two_parameter_model(model_objects): rgbas.append(rgba) sorted_colors = {} for rgba in rgbas: - luminance = sqrt(0.299*rgba[0]**2 + 0.587 * - rgba[1]**2 + 0.114*rgba[2]**2) + luminance = sqrt( + 0.299 * rgba[0] ** 2 + 0.587 * rgba[1] ** 2 + 0.114 * rgba[2] ** 2 + ) sorted_colors[luminance] = rgba sorted_colors_keys = list(sorted_colors.keys()) sorted_colors_keys.sort() @@ -2061,15 +2163,16 @@ def multi_display_two_parameter_model(model_objects): eval_results = {} for model_object in model_objects: function = model_object.mdl.hypothesis.function - result = function.evaluate((xmax*1.5, ymax*1.5)) + result = function.evaluate((xmax * 1.5, ymax * 1.5)) eval_results[result] = (function, model_object) # create dict for legend color and markers dict_callpath_color = {} function_char_len = 0 for i in range(len(scientific_functions)): - dict_callpath_color[str(model_objects[i].mdl.callpath)+": "+str(scientific_functions[i])] = [ - "surface", rgbas[i]] + dict_callpath_color[ + str(model_objects[i].mdl.callpath) + ": " + str(scientific_functions[i]) + ] = ["surface", rgbas[i]] if i == 0: function_char_len = len(str(scientific_functions[i])) else: @@ -2078,12 +2181,11 @@ def multi_display_two_parameter_model(model_objects): plt.ioff() fig = plt.figure() - ax = fig.add_subplot(projection='3d') + ax = fig.add_subplot(projection="3d") sorted_eval_results_keys = list(eval_results.keys()) sorted_eval_results_keys.sort() - eval_results = { - i: eval_results[i] for i in sorted_eval_results_keys} + eval_results = {i: eval_results[i] for i in sorted_eval_results_keys} model_objects = [] for _, value in eval_results.items(): @@ -2091,29 +2193,34 @@ def multi_display_two_parameter_model(model_objects): # sort based on x and y values measures_sorted = sorted( - model_objects[0].mdl.measurements, key=lambda x: ( - x.coordinate[0], x.coordinate[1]) + model_objects[0].mdl.measurements, + key=lambda x: (x.coordinate[0], x.coordinate[1]), ) X_params = [ms.coordinate[0] for ms in measures_sorted] Y_params = [ms.coordinate[1] for ms in measures_sorted] maxX = 1.5 * X_params[-1] maxY = 1.5 * Y_params[-1] - X, Y, Z_List, z_List = calculate_z_models( - maxX, maxY, model_objects, parameters) + X, Y, Z_List, z_List = calculate_z_models(maxX, maxY, model_objects, parameters) for i in range(len(Z_List)): ax.plot_surface( - X, Y, Z_List[i], + X, + Y, + Z_List[i], rstride=1, cstride=1, antialiased=False, - alpha=0.3, color=rgbas[i]) + alpha=0.3, + color=rgbas[i], + ) # axis labels and title - ax.set_xlabel(model_objects[0].parameters[0] + - " $"+str(DEFAULT_PARAM_NAMES[0])+"$") - ax.set_ylabel(model_objects[0].parameters[1] + - " $"+str(DEFAULT_PARAM_NAMES[1])+"$") + ax.set_xlabel( + model_objects[0].parameters[0] + " $" + str(DEFAULT_PARAM_NAMES[0]) + "$" + ) + ax.set_ylabel( + model_objects[0].parameters[1] + " $" + str(DEFAULT_PARAM_NAMES[1]) + "$" + ) ax.set_zlabel(model_objects[0].mdl.metric) # draw the legend @@ -2124,8 +2231,11 @@ def multi_display_two_parameter_model(model_objects): patch = mpatches.Patch(color=value[1], label=labelName) handles.append(patch) - ax.legend(handles=handles, loc="center right", - bbox_to_anchor=(2+(function_char_len)*0.01, 0.5)) + ax.legend( + handles=handles, + loc="center right", + bbox_to_anchor=(2 + (function_char_len) * 0.01, 0.5), + ) return fig, ax @@ -2158,7 +2268,7 @@ def calculate_z_models(maxX, maxY, model_list, parameters, max_z=0): # Get the z value for the x and y value z_List = list() Z_List = list() - previous = np.seterr(invalid='ignore', divide='ignore') + previous = np.seterr(invalid="ignore", divide="ignore") for model in model_list: function = model.mdl.hypothesis.function zs = calculate_z_optimized(X, Y, function, parameters, maxX, maxY) @@ -2184,7 +2294,7 @@ def calculate_grid_parameters(maxX, maxY): def getPixelGap(lowerlimit, upperlimit, numberOfPixels): """ - This function calculate the gap in pixels based on number of pixels and max value + This function calculate the gap in pixels based on number of pixels and max value """ pixelGap = (upperlimit - lowerlimit) / numberOfPixels return pixelGap @@ -2192,11 +2302,10 @@ def getPixelGap(lowerlimit, upperlimit, numberOfPixels): def calculate_z_optimized(X, Y, function, parameters, maxX, maxY): """ - This function evaluates the function passed to it. + This function evaluates the function passed to it. """ xs, ys = X.reshape(-1), Y.reshape(-1) - points = np.ndarray( - (len(parameters), len(xs))) + points = np.ndarray((len(parameters), len(xs))) points[0] = maxX points[1] = maxY