diff --git a/src/trustyai/explainers/counterfactuals.py b/src/trustyai/explainers/counterfactuals.py index 874b89d..42ad9b3 100644 --- a/src/trustyai/explainers/counterfactuals.py +++ b/src/trustyai/explainers/counterfactuals.py @@ -8,7 +8,6 @@ import uuid as _uuid from trustyai import _default_initializer # pylint: disable=unused-import -from .explanation_results import ExplanationResults from trustyai.utils._visualisation import ( DEFAULT_STYLE as ds, DEFAULT_RC_PARAMS as drcp, @@ -20,6 +19,8 @@ Model, ) +from .explanation_results import ExplanationResults + from trustyai.utils.data_conversions import ( prediction_object_to_numpy, diff --git a/src/trustyai/explainers/explanation_results.py b/src/trustyai/explainers/explanation_results.py index ed6ff0b..6107ba6 100644 --- a/src/trustyai/explainers/explanation_results.py +++ b/src/trustyai/explainers/explanation_results.py @@ -1,6 +1,6 @@ """Generic class for Explanation and Saliency results""" from abc import ABC, abstractmethod -from typing import Dict +from typing import Dict, Union import bokeh.models import pandas as pd @@ -8,8 +8,9 @@ from pandas.io.formats.style import Styler +# pylint: disable=too-few-public-methods class ExplanationResults(ABC): - """Abstract class for explanation visualisers""" + """Abstract class for non-saliency visualisers""" @abstractmethod def as_dataframe(self) -> pd.DataFrame: @@ -21,9 +22,19 @@ def as_html(self) -> Styler: # pylint: disable=too-few-public-methods -class SaliencyResults(ExplanationResults): +class SaliencyResults(ABC): """Abstract class for saliency visualisers""" + @abstractmethod + def as_dataframe( + self, output_name=None + ) -> Union[Dict[str, pd.DataFrame], pd.DataFrame]: + """Display explanation result as a dataframe""" + + @abstractmethod + def as_html(self, output_name=None) -> Union[Dict[str, Styler], Styler]: + """Visualise the styled dataframe""" + @abstractmethod def saliency_map(self): """Return the Saliencies as a dictionary, keyed by output name""" diff --git a/src/trustyai/explainers/lime.py b/src/trustyai/explainers/lime.py index 92d8766..3836a19 100644 --- a/src/trustyai/explainers/lime.py +++ b/src/trustyai/explainers/lime.py @@ -72,15 +72,24 @@ def saliency_map(self) -> Dict[str, Saliency]: for entry in self._java_saliency_results.saliencies.entrySet() } - def as_dataframe(self) -> pd.DataFrame: + def as_dataframe( + self, output_name: str = None + ) -> Union[Dict[str, pd.DataFrame], pd.DataFrame]: """ Return the LIME result as a dataframe. + Parameters + ---------- + output_name: str + If an output_name is passed, that output's explanation is returned as a pandas + dataframe. Otherwise, all outputs' explanation dataframes are returned in a dictionary. + + Returns ------- - pandas.DataFrame + pandas.DataFrame or Dict[str, pandas.Dataframe] Dictionary of DataFrames, keyed by output name, containing the results of the LIME - explanation. For each model output, the table will contain the following columns: + explanation. Each dataframe will contain the following columns: * ``Feature``: The name of the feature * ``Value``: The value of the feature for this particular input. @@ -88,30 +97,44 @@ def as_dataframe(self) -> pd.DataFrame: * ``Confidence``: The confidence of this explanation as returned by the explainer. """ + outputs = self.saliency_map().keys() data = {} for output in outputs: - output_rows = [] - for pfi in self.saliency_map().get(output).getPerFeatureImportance(): - output_rows.append( - { - "Feature": str(pfi.getFeature().getName().toString()), - "Value": pfi.getFeature().getValue().getUnderlyingObject(), - "Saliency": pfi.getScore(), - "Confidence": pfi.getConfidence(), - } - ) - data[output] = pd.DataFrame(output_rows) + if output_name is None or output == output_name: + output_rows = [] + for pfi in self.saliency_map().get(output).getPerFeatureImportance(): + output_rows.append( + { + "Feature": str(pfi.getFeature().getName().toString()), + "Value": pfi.getFeature().getValue().getUnderlyingObject(), + "Saliency": pfi.getScore(), + "Confidence": pfi.getConfidence(), + } + ) + data[output] = pd.DataFrame(output_rows) + + if output_name is not None: + return data[output_name] return data - def as_html(self) -> pd.io.formats.style.Styler: + def as_html( + self, output_name: str = None + ) -> Union[Dict[str, pd.io.formats.style.Styler], pd.io.formats.style.Styler]: """ Return the LIME results as Pandas Styler objects. + Parameters + ---------- + output_name: str + If an output_name is passed, that output's explanation is returned as a pandas Styler. + Otherwise, all outputs' explanation stylers are returned in a dictionary. + + Returns ------- - Dict[str, pandas.Styler] + pandas.Styler or Dict[str, pandas.Styler] Dictionary of stylers keyed by output name. Each styler containing the results of the LIME explanation for that particular output, in the same schema as in :func:`as_dataframe`. This will: @@ -121,19 +144,25 @@ def as_html(self) -> pd.io.formats.style.Styler: htmls = {} for k, df in self.as_dataframe().items(): - htmls[k] = df.style.background_gradient( - LinearSegmentedColormap.from_list( - name="rwg", - colors=[ - ds["negative_primary_colour"], - ds["neutral_primary_colour"], - ds["positive_primary_colour"], - ], - ), - subset="Saliency", - vmin=-1 * max(np.abs(df["Saliency"])), - vmax=max(np.abs(df["Saliency"])), - ) + if output_name is None or k == output_name: + style = df.style.background_gradient( + LinearSegmentedColormap.from_list( + name="rwg", + colors=[ + ds["negative_primary_colour"], + ds["neutral_primary_colour"], + ds["positive_primary_colour"], + ], + ), + subset="Saliency", + vmin=-1 * max(np.abs(df["Saliency"])), + vmax=max(np.abs(df["Saliency"])), + ) + style.set_caption(f"LIME Explanation of {output_name}") + htmls[k] = style + + if output_name is not None: + return htmls[output_name] return htmls def _matplotlib_plot(self, output_name: str, block=True) -> None: diff --git a/src/trustyai/explainers/shap.py b/src/trustyai/explainers/shap.py index b80648b..6aae309 100644 --- a/src/trustyai/explainers/shap.py +++ b/src/trustyai/explainers/shap.py @@ -123,15 +123,24 @@ def _saliency_to_dataframe(self, saliency, output_name): return pd.DataFrame([fnull] + data_rows) - def as_dataframe(self) -> Dict[str, pd.DataFrame]: + def as_dataframe( + self, output_name: str = None + ) -> Union[Dict[str, pd.DataFrame], pd.DataFrame]: """ Return the SHAP results as dataframes. + Parameters + ---------- + output_name: str + If an output_name is passed, that output's explanation is returned as a dataframe. + Otherwise, all outputs' explanation dataframe are returned in a dictionary. + Returns ------- - Dict[str, pandas.DataFrame] - Dictionary of DataFrames, keyed by output name, containing the results of the SHAP - explanation. For each model output, the table will contain the following columns: + pandas.Dataframe or Dict[str, pandas.DataFrame] + A dataframe or dictionary of DataFrames, keyed by output name. Each dataframe + contains the results of the SHAP explanation for a particular output. Each dataframe + wiil contain the following columns: * ``Feature``: The name of the feature * ``Feature Value``: The value of the feature for this particular input. @@ -140,18 +149,33 @@ def as_dataframe(self) -> Dict[str, pd.DataFrame]: * ``Confidence``: The confidence of this explanation as returned by the explainer. """ - df_dict = {} - for output_name, saliency in self.saliency_map().items(): - df_dict[output_name] = self._saliency_to_dataframe(saliency, output_name) - return df_dict + if output_name is None: + df_dict = {} + for output_name_key, saliency in self.saliency_map().items(): + df_dict[output_name_key] = self._saliency_to_dataframe( + saliency, output_name_key + ) + return df_dict + return self._saliency_to_dataframe( + self.saliency_map()[output_name], output_name + ) - def as_html(self) -> Dict[str, pd.io.formats.style.Styler]: + def as_html( + self, output_name: str = None + ) -> Union[Dict[str, pd.io.formats.style.Styler], pd.io.formats.style.Styler]: """ Return the SHAP results as Pandas Styler objects. + Parameters + ---------- + output_name: str + If an output_name is passed, that output's explanation is returned as a pandas Styler. + Otherwise, all outputs' explanation stylers are returned in a dictionary. + + Returns ------- - Dict[str, pandas.Styler] + Pandas Styler or Dict[str, pandas.Styler] Dictionary of stylers keyed by output name. Each styler containing the results of the SHAP explanation for that particular output, in the same schema as in :func:`as_dataframe`. This will: @@ -174,31 +198,35 @@ def _color_feature_values(feature_values, background_vals): return [None] + formats df_dict = {} - for output_name, saliency in self.saliency_map().items(): - df = self._saliency_to_dataframe(saliency, output_name) - shap_values = df["SHAP Value"].values[1:] - background_mean_feature_values = df["Mean Background Value"].values[1:] - - style = df.style.background_gradient( - LinearSegmentedColormap.from_list( - name="rwg", - colors=[ - ds["negative_primary_colour"], - ds["neutral_primary_colour"], - ds["positive_primary_colour"], - ], - ), - subset=(slice(1, None), "SHAP Value"), - vmin=-1 * max(np.abs(shap_values)), - vmax=max(np.abs(shap_values)), - ) - style.set_caption(f"Explanation of {output_name}") - df_dict[output_name] = style.apply( - _color_feature_values, - background_vals=background_mean_feature_values, - subset="Value", - axis=0, - ) + for output_name_key, saliency in self.saliency_map().items(): + if output_name is None or output_name_key == output_name: + df = self._saliency_to_dataframe(saliency, output_name_key) + shap_values = df["SHAP Value"].values[1:] + background_mean_feature_values = df["Mean Background Value"].values[1:] + + style = df.style.background_gradient( + LinearSegmentedColormap.from_list( + name="rwg", + colors=[ + ds["negative_primary_colour"], + ds["neutral_primary_colour"], + ds["positive_primary_colour"], + ], + ), + subset=(slice(1, None), "SHAP Value"), + vmin=-1 * max(np.abs(shap_values)), + vmax=max(np.abs(shap_values)), + ) + style.set_caption(f"SHAP Explanation of {output_name_key}") + df_dict[output_name_key] = style.apply( + _color_feature_values, + background_vals=background_mean_feature_values, + subset="Value", + axis=0, + ) + + if output_name is not None: + return df_dict[output_name] return df_dict def _matplotlib_plot(self, output_name, block=True) -> None: diff --git a/tests/general/test_limeexplainer.py b/tests/general/test_limeexplainer.py index b2721be..7c72155 100644 --- a/tests/general/test_limeexplainer.py +++ b/tests/general/test_limeexplainer.py @@ -188,6 +188,7 @@ def test_lime_numpy(): for oname in onames: assert oname in explanation.as_dataframe().keys() + assert len(explanation.as_dataframe(oname)) == 5 for fname in fnames: assert fname in explanation.as_dataframe()[oname]['Feature'].values diff --git a/tests/general/test_shap.py b/tests/general/test_shap.py index adbda0a..8fdf779 100644 --- a/tests/general/test_shap.py +++ b/tests/general/test_shap.py @@ -131,5 +131,8 @@ def test_shap_numpy(): for oname in onames: assert oname in explanation.as_dataframe().keys() + assert len(explanation.as_dataframe(oname)) == 5 + 1 + for fname in fnames: assert fname in explanation.as_dataframe()[oname]['Feature'].values +