#!/usr/bin/env python # coding: utf-8 # # Tutorial to invoke LIME explainers via aix360 # # There are two ways to use [LIME](https://github.com/marcotcr/lime) explainers after installing aix360: # - [Approach 1 (aix360 style)](#approach1): LIME explainers can be invoked in a manner similar to other explainer algorithms in aix360 via the implemented wrapper classes. # - [Approach 2 (original style)](#approach2): Since LIME comes pre-installed in aix360, the explainers can simply be invoked directly. # # This notebook showcases both these approaches to invoke LIME. The notebook is based on the following example from the original LIME tutorial: https://marcotcr.github.io/lime/tutorials/Lime%20-%20multiclass.html # ## Approach 1 (aix360 style) # # - Note the import statement related to LimeTextExplainer # In[1]: from __future__ import print_function import sklearn import numpy as np import sklearn import sklearn.ensemble import sklearn.metrics # Importing LimeTextExplainer (aix360 sytle) from aix360.algorithms.lime import LimeTextExplainer # In[2]: # Supress jupyter warnings if required for cleaner output import warnings warnings.simplefilter('ignore') # ### Fetching data, training a classifier # In[3]: from sklearn.datasets import fetch_20newsgroups newsgroups_train = fetch_20newsgroups(subset='train') newsgroups_test = fetch_20newsgroups(subset='test') # making class names shorter class_names = [x.split('.')[-1] if 'misc' not in x else '.'.join(x.split('.')[-2:]) for x in newsgroups_train.target_names] class_names[3] = 'pc.hardware' class_names[4] = 'mac.hardware' # In[4]: print(','.join(class_names)) # In[5]: vectorizer = sklearn.feature_extraction.text.TfidfVectorizer(lowercase=False) train_vectors = vectorizer.fit_transform(newsgroups_train.data) test_vectors = vectorizer.transform(newsgroups_test.data) # In[6]: from sklearn.naive_bayes import MultinomialNB nb = MultinomialNB(alpha=.01) nb.fit(train_vectors, newsgroups_train.target) # In[7]: pred = nb.predict(test_vectors) sklearn.metrics.f1_score(newsgroups_test.target, pred, average='weighted') # ### Explaining predictions using lime # In[8]: from sklearn.pipeline import make_pipeline c = make_pipeline(vectorizer, nb) # In[9]: print(c.predict_proba([newsgroups_test.data[0]]).round(3)) # In[10]: limeexplainer = LimeTextExplainer(class_names=class_names) print(type(limeexplainer)) # In[11]: idx = 1340 # aix360 style for explaining input instances exp = limeexplainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=6, labels=[0, 17]) print('Document id: %d' % idx) print('Predicted class =', class_names[nb.predict(test_vectors[idx]).reshape(1,-1)[0,0]]) print('True class: %s' % class_names[newsgroups_test.target[idx]]) # In[12]: print ('Explanation for class %s' % class_names[0]) print ('\n'.join(map(str, exp.as_list(label=0)))) print () print ('Explanation for class %s' % class_names[17]) print ('\n'.join(map(str, exp.as_list(label=17)))) # In[13]: # aix360 style for explaining input instances exp = limeexplainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=6, top_labels=2) print(exp.available_labels()) # In[14]: exp.show_in_notebook(text=False) # In[15]: exp.show_in_notebook(text=newsgroups_test.data[idx], labels=(0,)) # ## Approach 2 (original style) # # - Note the import statement related to LimeTextExplainer # In[16]: from __future__ import print_function import sklearn import numpy as np import sklearn import sklearn.ensemble import sklearn.metrics # Importing LimeTextExplainer (original style) from lime.lime_text import LimeTextExplainer # In[17]: # Supress jupyter warnings if required for cleaner output import warnings warnings.simplefilter('ignore') # ### Fetching data, training a classifier # In[18]: from sklearn.datasets import fetch_20newsgroups newsgroups_train = fetch_20newsgroups(subset='train') newsgroups_test = fetch_20newsgroups(subset='test') # making class names shorter class_names = [x.split('.')[-1] if 'misc' not in x else '.'.join(x.split('.')[-2:]) for x in newsgroups_train.target_names] class_names[3] = 'pc.hardware' class_names[4] = 'mac.hardware' # In[19]: print(','.join(class_names)) # In[20]: vectorizer = sklearn.feature_extraction.text.TfidfVectorizer(lowercase=False) train_vectors = vectorizer.fit_transform(newsgroups_train.data) test_vectors = vectorizer.transform(newsgroups_test.data) # In[21]: from sklearn.naive_bayes import MultinomialNB nb = MultinomialNB(alpha=.01) nb.fit(train_vectors, newsgroups_train.target) # In[22]: pred = nb.predict(test_vectors) sklearn.metrics.f1_score(newsgroups_test.target, pred, average='weighted') # ### Explaining predictions using lime # In[23]: from sklearn.pipeline import make_pipeline c = make_pipeline(vectorizer, nb) # In[24]: print(c.predict_proba([newsgroups_test.data[0]]).round(3)) # In[25]: explainer = LimeTextExplainer(class_names=class_names) print(type(explainer)) # In[26]: idx = 1340 # LIME original style for explaining input instances exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=6, labels=[0, 17]) print('Document id: %d' % idx) print('Predicted class =', class_names[nb.predict(test_vectors[idx]).reshape(1,-1)[0,0]]) print('True class: %s' % class_names[newsgroups_test.target[idx]]) # In[27]: print ('Explanation for class %s' % class_names[0]) print ('\n'.join(map(str, exp.as_list(label=0)))) print () print ('Explanation for class %s' % class_names[17]) print ('\n'.join(map(str, exp.as_list(label=17)))) # In[28]: # LIME original style for explaining input instances exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=6, top_labels=2) print(exp.available_labels()) # In[29]: exp.show_in_notebook(text=False) # In[30]: exp.show_in_notebook(text=newsgroups_test.data[idx], labels=(0,))