#!/usr/bin/env python
# coding: utf-8
# # Tutorial to invoke SHAP explainers via aix360
#
# There are two ways to use [SHAP](https://github.com/slundberg/shap) explainers after installing aix360:
# - [Approach 1 (aix360 style)](#approach1): SHAP explainers can be invoked in a manner similar to other explainer algorithms in aix360 via the implemented wrapper classes.
# - [Approach 2 (original style)](#approach2): Since SHAP comes pre-installed in aix360, the explainers can simply be invoked directly.
#
# This notebook showcases both these approaches to invoke SHAP. The notebook is based on the following example from the original SHAP tutorial:
# https://slundberg.github.io/shap/notebooks/Iris%20classification%20with%20scikit-learn.html
#
# ## Approach 1 (aix360 style)
#
# - Note the import statement related to KernelExplainer
# In[1]:
from __future__ import print_function
import sklearn
from sklearn.model_selection import train_test_split
import sklearn.datasets
import sklearn.ensemble
import numpy as np
import time
np.random.seed(1)
# Importing shap KernelExplainer (aix360 style)
from aix360.algorithms.shap import KernelExplainer
# the following import is required for access to shap plotting functions and datasets
import shap
# In[2]:
# Supress jupyter warnings if required for cleaner output
import warnings
warnings.simplefilter('ignore')
# ### K-nearest neighbors
# In[3]:
X_train,X_test,Y_train,Y_test = train_test_split(*shap.datasets.iris(), test_size=0.2, random_state=0)
# rather than use the whole training set to estimate expected values, we could summarize with
# a set of weighted kmeans, each weighted by the number of points they represent. But this dataset
# is so small we don't worry about it
#X_train_summary = shap.kmeans(X_train, 50)
def print_accuracy(f):
print("Accuracy = {0}%".format(100*np.sum(f(X_test) == Y_test)/len(Y_test)))
time.sleep(0.5) # to let the print get out before any progress bars
shap.initjs()
knn = sklearn.neighbors.KNeighborsClassifier()
knn.fit(X_train, Y_train)
print_accuracy(knn.predict)
# ### Explain a single prediction from the test set
# In[4]:
shapexplainer = KernelExplainer(knn.predict_proba, X_train)
print(type(shapexplainer))
# In[5]:
# aix360 style for explaining input instances
shap_values = shapexplainer.explain_instance(X_test.iloc[0,:])
# In[6]:
shap.force_plot(shapexplainer.explainer.expected_value[0], shap_values[0], X_test.iloc[0,:])
# ### Explain all the predictions in the test set
# In[7]:
# aix360 style for explaining input instances
shap_values = shapexplainer.explain_instance(X_test)
shap.force_plot(shapexplainer.explainer.expected_value[0], shap_values[0], X_test)
# ## Approach 2 (original style)
#
# - Note the last import statement related to KernelExplainer
# In[8]:
from __future__ import print_function
import sklearn
from sklearn.model_selection import train_test_split
import sklearn.datasets
import sklearn.ensemble
import numpy as np
import time
np.random.seed(1)
# Importing shap KernelExplainer (original style)
import shap
from shap import KernelExplainer
# In[9]:
# Supress jupyter warnings if required for cleaner output
import warnings
warnings.simplefilter('ignore')
# ### K-nearest neighbors
# In[10]:
X_train,X_test,Y_train,Y_test = train_test_split(*shap.datasets.iris(), test_size=0.2, random_state=0)
# rather than use the whole training set to estimate expected values, we could summarize with
# a set of weighted kmeans, each weighted by the number of points they represent. But this dataset
# is so small we don't worry about it
#X_train_summary = shap.kmeans(X_train, 50)
def print_accuracy(f):
print("Accuracy = {0}%".format(100*np.sum(f(X_test) == Y_test)/len(Y_test)))
time.sleep(0.5) # to let the print get out before any progress bars
shap.initjs()
knn = sklearn.neighbors.KNeighborsClassifier()
knn.fit(X_train, Y_train)
print_accuracy(knn.predict)
# ### Explain a single prediction from the test set
# In[11]:
explainer = KernelExplainer(knn.predict_proba, X_train)
print(type(explainer))
# In[12]:
# Shap original style for explaining input instances
shap_values = explainer.shap_values(X_test.iloc[0,:])
shap.force_plot(explainer.expected_value[0], shap_values[0], X_test.iloc[0,:])
# ### Explain all the predictions in the test set
# In[13]:
# Shap original style for explaining input instances
shap_values = explainer.shap_values(X_test)
shap.force_plot(explainer.expected_value[0], shap_values[0], X_test)