There are two ways to use SHAP explainers after installing aix360:
This notebook showcases both these approaches to invoke SHAP. The notebook is based on the following example from the original SHAP tutorial: https://slundberg.github.io/shap/notebooks/Iris%20classification%20with%20scikit-learn.html
from __future__ import print_function
import sklearn
from sklearn.model_selection import train_test_split
import sklearn.datasets
import sklearn.ensemble
import numpy as np
import time
np.random.seed(1)
# Importing shap KernelExplainer (aix360 style)
from aix360.algorithms.shap import KernelExplainer
# the following import is required for access to shap plotting functions and datasets
import shap
# Supress jupyter warnings if required for cleaner output
import warnings
warnings.simplefilter('ignore')
X_train,X_test,Y_train,Y_test = train_test_split(*shap.datasets.iris(), test_size=0.2, random_state=0)
# rather than use the whole training set to estimate expected values, we could summarize with
# a set of weighted kmeans, each weighted by the number of points they represent. But this dataset
# is so small we don't worry about it
#X_train_summary = shap.kmeans(X_train, 50)
def print_accuracy(f):
print("Accuracy = {0}%".format(100*np.sum(f(X_test) == Y_test)/len(Y_test)))
time.sleep(0.5) # to let the print get out before any progress bars
shap.initjs()
knn = sklearn.neighbors.KNeighborsClassifier()
knn.fit(X_train, Y_train)
print_accuracy(knn.predict)
Accuracy = 96.66666666666667%
shapexplainer = KernelExplainer(knn.predict_proba, X_train)
print(type(shapexplainer))
Using 120 background data samples could cause slower run times. Consider using shap.kmeans(data, K) to summarize the background as K weighted samples.
<class 'aix360.algorithms.shap.shap_wrapper.KernelExplainer'>
# aix360 style for explaining input instances
shap_values = shapexplainer.explain_instance(X_test.iloc[0,:])
shap.force_plot(shapexplainer.explainer.expected_value[0], shap_values[0], X_test.iloc[0,:])
# aix360 style for explaining input instances
shap_values = shapexplainer.explain_instance(X_test)
shap.force_plot(shapexplainer.explainer.expected_value[0], shap_values[0], X_test)
HBox(children=(IntProgress(value=0, max=30), HTML(value='')))
from __future__ import print_function
import sklearn
from sklearn.model_selection import train_test_split
import sklearn.datasets
import sklearn.ensemble
import numpy as np
import time
np.random.seed(1)
# Importing shap KernelExplainer (original style)
import shap
from shap import KernelExplainer
# Supress jupyter warnings if required for cleaner output
import warnings
warnings.simplefilter('ignore')
X_train,X_test,Y_train,Y_test = train_test_split(*shap.datasets.iris(), test_size=0.2, random_state=0)
# rather than use the whole training set to estimate expected values, we could summarize with
# a set of weighted kmeans, each weighted by the number of points they represent. But this dataset
# is so small we don't worry about it
#X_train_summary = shap.kmeans(X_train, 50)
def print_accuracy(f):
print("Accuracy = {0}%".format(100*np.sum(f(X_test) == Y_test)/len(Y_test)))
time.sleep(0.5) # to let the print get out before any progress bars
shap.initjs()
knn = sklearn.neighbors.KNeighborsClassifier()
knn.fit(X_train, Y_train)
print_accuracy(knn.predict)
Accuracy = 96.66666666666667%
explainer = KernelExplainer(knn.predict_proba, X_train)
print(type(explainer))
Using 120 background data samples could cause slower run times. Consider using shap.kmeans(data, K) to summarize the background as K weighted samples.
<class 'shap.explainers.kernel.KernelExplainer'>
# Shap original style for explaining input instances
shap_values = explainer.shap_values(X_test.iloc[0,:])
shap.force_plot(explainer.expected_value[0], shap_values[0], X_test.iloc[0,:])
# Shap original style for explaining input instances
shap_values = explainer.shap_values(X_test)
shap.force_plot(explainer.expected_value[0], shap_values[0], X_test)
HBox(children=(IntProgress(value=0, max=30), HTML(value='')))