#!/usr/bin/env python # coding: utf-8 # # Tutorial to invoke SHAP explainers via aix360 # # There are two ways to use [SHAP](https://github.com/slundberg/shap) explainers after installing aix360: # - [Approach 1 (aix360 style)](#approach1): SHAP explainers can be invoked in a manner similar to other explainer algorithms in aix360 via the implemented wrapper classes. # - [Approach 2 (original style)](#approach2): Since SHAP comes pre-installed in aix360, the explainers can simply be invoked directly. # # This notebook showcases both these approaches to invoke SHAP. The notebook is based on the following example from the original SHAP tutorial: # https://slundberg.github.io/shap/notebooks/Iris%20classification%20with%20scikit-learn.html # # ## Approach 1 (aix360 style) # # - Note the import statement related to KernelExplainer # In[1]: from __future__ import print_function import sklearn from sklearn.model_selection import train_test_split import sklearn.datasets import sklearn.ensemble import numpy as np import time np.random.seed(1) # Importing shap KernelExplainer (aix360 style) from aix360.algorithms.shap import KernelExplainer # the following import is required for access to shap plotting functions and datasets import shap # In[2]: # Supress jupyter warnings if required for cleaner output import warnings warnings.simplefilter('ignore') # ### K-nearest neighbors # In[3]: X_train,X_test,Y_train,Y_test = train_test_split(*shap.datasets.iris(), test_size=0.2, random_state=0) # rather than use the whole training set to estimate expected values, we could summarize with # a set of weighted kmeans, each weighted by the number of points they represent. But this dataset # is so small we don't worry about it #X_train_summary = shap.kmeans(X_train, 50) def print_accuracy(f): print("Accuracy = {0}%".format(100*np.sum(f(X_test) == Y_test)/len(Y_test))) time.sleep(0.5) # to let the print get out before any progress bars shap.initjs() knn = sklearn.neighbors.KNeighborsClassifier() knn.fit(X_train, Y_train) print_accuracy(knn.predict) # ### Explain a single prediction from the test set # In[4]: shapexplainer = KernelExplainer(knn.predict_proba, X_train) print(type(shapexplainer)) # In[5]: # aix360 style for explaining input instances shap_values = shapexplainer.explain_instance(X_test.iloc[0,:]) # In[6]: shap.force_plot(shapexplainer.explainer.expected_value[0], shap_values[0], X_test.iloc[0,:]) # ### Explain all the predictions in the test set # In[7]: # aix360 style for explaining input instances shap_values = shapexplainer.explain_instance(X_test) shap.force_plot(shapexplainer.explainer.expected_value[0], shap_values[0], X_test) # ## Approach 2 (original style) # # - Note the last import statement related to KernelExplainer # In[8]: from __future__ import print_function import sklearn from sklearn.model_selection import train_test_split import sklearn.datasets import sklearn.ensemble import numpy as np import time np.random.seed(1) # Importing shap KernelExplainer (original style) import shap from shap import KernelExplainer # In[9]: # Supress jupyter warnings if required for cleaner output import warnings warnings.simplefilter('ignore') # ### K-nearest neighbors # In[10]: X_train,X_test,Y_train,Y_test = train_test_split(*shap.datasets.iris(), test_size=0.2, random_state=0) # rather than use the whole training set to estimate expected values, we could summarize with # a set of weighted kmeans, each weighted by the number of points they represent. But this dataset # is so small we don't worry about it #X_train_summary = shap.kmeans(X_train, 50) def print_accuracy(f): print("Accuracy = {0}%".format(100*np.sum(f(X_test) == Y_test)/len(Y_test))) time.sleep(0.5) # to let the print get out before any progress bars shap.initjs() knn = sklearn.neighbors.KNeighborsClassifier() knn.fit(X_train, Y_train) print_accuracy(knn.predict) # ### Explain a single prediction from the test set # In[11]: explainer = KernelExplainer(knn.predict_proba, X_train) print(type(explainer)) # In[12]: # Shap original style for explaining input instances shap_values = explainer.shap_values(X_test.iloc[0,:]) shap.force_plot(explainer.expected_value[0], shap_values[0], X_test.iloc[0,:]) # ### Explain all the predictions in the test set # In[13]: # Shap original style for explaining input instances shap_values = explainer.shap_values(X_test) shap.force_plot(explainer.expected_value[0], shap_values[0], X_test)