Skip to content

FAI-906b: Added argument to as_html and as_df to allow for single output selection #134

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/trustyai/explainers/counterfactuals.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import uuid as _uuid

from trustyai import _default_initializer # pylint: disable=unused-import
from .explanation_results import ExplanationResults
from trustyai.utils._visualisation import (
DEFAULT_STYLE as ds,
DEFAULT_RC_PARAMS as drcp,
Expand All @@ -20,6 +19,8 @@
Model,
)

from .explanation_results import ExplanationResults


from trustyai.utils.data_conversions import (
prediction_object_to_numpy,
Expand Down
17 changes: 14 additions & 3 deletions src/trustyai/explainers/explanation_results.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
"""Generic class for Explanation and Saliency results"""
from abc import ABC, abstractmethod
from typing import Dict
from typing import Dict, Union

import bokeh.models
import pandas as pd
from bokeh.io import show
from pandas.io.formats.style import Styler


# pylint: disable=too-few-public-methods
class ExplanationResults(ABC):
"""Abstract class for explanation visualisers"""
"""Abstract class for non-saliency visualisers"""

@abstractmethod
def as_dataframe(self) -> pd.DataFrame:
Expand All @@ -21,9 +22,19 @@ def as_html(self) -> Styler:


# pylint: disable=too-few-public-methods
class SaliencyResults(ExplanationResults):
class SaliencyResults(ABC):
"""Abstract class for saliency visualisers"""

@abstractmethod
def as_dataframe(
self, output_name=None
) -> Union[Dict[str, pd.DataFrame], pd.DataFrame]:
"""Display explanation result as a dataframe"""

@abstractmethod
def as_html(self, output_name=None) -> Union[Dict[str, Styler], Styler]:
"""Visualise the styled dataframe"""

@abstractmethod
def saliency_map(self):
"""Return the Saliencies as a dictionary, keyed by output name"""
Expand Down
87 changes: 58 additions & 29 deletions src/trustyai/explainers/lime.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,46 +72,69 @@ def saliency_map(self) -> Dict[str, Saliency]:
for entry in self._java_saliency_results.saliencies.entrySet()
}

def as_dataframe(self) -> pd.DataFrame:
def as_dataframe(
self, output_name: str = None
) -> Union[Dict[str, pd.DataFrame], pd.DataFrame]:
"""
Return the LIME result as a dataframe.

Parameters
----------
output_name: str
If an output_name is passed, that output's explanation is returned as a pandas
dataframe. Otherwise, all outputs' explanation dataframes are returned in a dictionary.


Returns
-------
pandas.DataFrame
pandas.DataFrame or Dict[str, pandas.Dataframe]
Dictionary of DataFrames, keyed by output name, containing the results of the LIME
explanation. For each model output, the table will contain the following columns:
explanation. Each dataframe will contain the following columns:

* ``Feature``: The name of the feature
* ``Value``: The value of the feature for this particular input.
* ``Saliency``: The importance of this feature to the output.
* ``Confidence``: The confidence of this explanation as returned by the explainer.

"""

outputs = self.saliency_map().keys()

data = {}
for output in outputs:
output_rows = []
for pfi in self.saliency_map().get(output).getPerFeatureImportance():
output_rows.append(
{
"Feature": str(pfi.getFeature().getName().toString()),
"Value": pfi.getFeature().getValue().getUnderlyingObject(),
"Saliency": pfi.getScore(),
"Confidence": pfi.getConfidence(),
}
)
data[output] = pd.DataFrame(output_rows)
if output_name is None or output == output_name:
output_rows = []
for pfi in self.saliency_map().get(output).getPerFeatureImportance():
output_rows.append(
{
"Feature": str(pfi.getFeature().getName().toString()),
"Value": pfi.getFeature().getValue().getUnderlyingObject(),
"Saliency": pfi.getScore(),
"Confidence": pfi.getConfidence(),
}
)
data[output] = pd.DataFrame(output_rows)

if output_name is not None:
return data[output_name]
return data

def as_html(self) -> pd.io.formats.style.Styler:
def as_html(
self, output_name: str = None
) -> Union[Dict[str, pd.io.formats.style.Styler], pd.io.formats.style.Styler]:
"""
Return the LIME results as Pandas Styler objects.

Parameters
----------
output_name: str
If an output_name is passed, that output's explanation is returned as a pandas Styler.
Otherwise, all outputs' explanation stylers are returned in a dictionary.


Returns
-------
Dict[str, pandas.Styler]
pandas.Styler or Dict[str, pandas.Styler]
Dictionary of stylers keyed by output name. Each styler containing the results of the
LIME explanation for that particular output, in the same
schema as in :func:`as_dataframe`. This will:
Expand All @@ -121,19 +144,25 @@ def as_html(self) -> pd.io.formats.style.Styler:

htmls = {}
for k, df in self.as_dataframe().items():
htmls[k] = df.style.background_gradient(
LinearSegmentedColormap.from_list(
name="rwg",
colors=[
ds["negative_primary_colour"],
ds["neutral_primary_colour"],
ds["positive_primary_colour"],
],
),
subset="Saliency",
vmin=-1 * max(np.abs(df["Saliency"])),
vmax=max(np.abs(df["Saliency"])),
)
if output_name is None or k == output_name:
style = df.style.background_gradient(
LinearSegmentedColormap.from_list(
name="rwg",
colors=[
ds["negative_primary_colour"],
ds["neutral_primary_colour"],
ds["positive_primary_colour"],
],
),
subset="Saliency",
vmin=-1 * max(np.abs(df["Saliency"])),
vmax=max(np.abs(df["Saliency"])),
)
style.set_caption(f"LIME Explanation of {output_name}")
htmls[k] = style

if output_name is not None:
return htmls[output_name]
return htmls

def _matplotlib_plot(self, output_name: str, block=True) -> None:
Expand Down
98 changes: 63 additions & 35 deletions src/trustyai/explainers/shap.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,15 +123,24 @@ def _saliency_to_dataframe(self, saliency, output_name):

return pd.DataFrame([fnull] + data_rows)

def as_dataframe(self) -> Dict[str, pd.DataFrame]:
def as_dataframe(
self, output_name: str = None
) -> Union[Dict[str, pd.DataFrame], pd.DataFrame]:
"""
Return the SHAP results as dataframes.

Parameters
----------
output_name: str
If an output_name is passed, that output's explanation is returned as a dataframe.
Otherwise, all outputs' explanation dataframe are returned in a dictionary.

Returns
-------
Dict[str, pandas.DataFrame]
Dictionary of DataFrames, keyed by output name, containing the results of the SHAP
explanation. For each model output, the table will contain the following columns:
pandas.Dataframe or Dict[str, pandas.DataFrame]
A dataframe or dictionary of DataFrames, keyed by output name. Each dataframe
contains the results of the SHAP explanation for a particular output. Each dataframe
wiil contain the following columns:

* ``Feature``: The name of the feature
* ``Feature Value``: The value of the feature for this particular input.
Expand All @@ -140,18 +149,33 @@ def as_dataframe(self) -> Dict[str, pd.DataFrame]:
* ``Confidence``: The confidence of this explanation as returned by the explainer.

"""
df_dict = {}
for output_name, saliency in self.saliency_map().items():
df_dict[output_name] = self._saliency_to_dataframe(saliency, output_name)
return df_dict
if output_name is None:
df_dict = {}
for output_name_key, saliency in self.saliency_map().items():
df_dict[output_name_key] = self._saliency_to_dataframe(
saliency, output_name_key
)
return df_dict
return self._saliency_to_dataframe(
self.saliency_map()[output_name], output_name
)

def as_html(self) -> Dict[str, pd.io.formats.style.Styler]:
def as_html(
self, output_name: str = None
) -> Union[Dict[str, pd.io.formats.style.Styler], pd.io.formats.style.Styler]:
"""
Return the SHAP results as Pandas Styler objects.

Parameters
----------
output_name: str
If an output_name is passed, that output's explanation is returned as a pandas Styler.
Otherwise, all outputs' explanation stylers are returned in a dictionary.


Returns
-------
Dict[str, pandas.Styler]
Pandas Styler or Dict[str, pandas.Styler]
Dictionary of stylers keyed by output name. Each styler containing the results of the
SHAP explanation for that particular output, in the same
schema as in :func:`as_dataframe`. This will:
Expand All @@ -174,31 +198,35 @@ def _color_feature_values(feature_values, background_vals):
return [None] + formats

df_dict = {}
for output_name, saliency in self.saliency_map().items():
df = self._saliency_to_dataframe(saliency, output_name)
shap_values = df["SHAP Value"].values[1:]
background_mean_feature_values = df["Mean Background Value"].values[1:]

style = df.style.background_gradient(
LinearSegmentedColormap.from_list(
name="rwg",
colors=[
ds["negative_primary_colour"],
ds["neutral_primary_colour"],
ds["positive_primary_colour"],
],
),
subset=(slice(1, None), "SHAP Value"),
vmin=-1 * max(np.abs(shap_values)),
vmax=max(np.abs(shap_values)),
)
style.set_caption(f"Explanation of {output_name}")
df_dict[output_name] = style.apply(
_color_feature_values,
background_vals=background_mean_feature_values,
subset="Value",
axis=0,
)
for output_name_key, saliency in self.saliency_map().items():
if output_name is None or output_name_key == output_name:
df = self._saliency_to_dataframe(saliency, output_name_key)
shap_values = df["SHAP Value"].values[1:]
background_mean_feature_values = df["Mean Background Value"].values[1:]

style = df.style.background_gradient(
LinearSegmentedColormap.from_list(
name="rwg",
colors=[
ds["negative_primary_colour"],
ds["neutral_primary_colour"],
ds["positive_primary_colour"],
],
),
subset=(slice(1, None), "SHAP Value"),
vmin=-1 * max(np.abs(shap_values)),
vmax=max(np.abs(shap_values)),
)
style.set_caption(f"SHAP Explanation of {output_name_key}")
df_dict[output_name_key] = style.apply(
_color_feature_values,
background_vals=background_mean_feature_values,
subset="Value",
axis=0,
)

if output_name is not None:
return df_dict[output_name]
return df_dict

def _matplotlib_plot(self, output_name, block=True) -> None:
Expand Down
1 change: 1 addition & 0 deletions tests/general/test_limeexplainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ def test_lime_numpy():

for oname in onames:
assert oname in explanation.as_dataframe().keys()
assert len(explanation.as_dataframe(oname)) == 5
for fname in fnames:
assert fname in explanation.as_dataframe()[oname]['Feature'].values

3 changes: 3 additions & 0 deletions tests/general/test_shap.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,5 +131,8 @@ def test_shap_numpy():

for oname in onames:
assert oname in explanation.as_dataframe().keys()
assert len(explanation.as_dataframe(oname)) == 5 + 1

for fname in fnames:
assert fname in explanation.as_dataframe()[oname]['Feature'].values