#!/usr/bin/env python
# coding: utf-8
# # Tutorial to invoke LIME explainers via aix360
#
# There are two ways to use [LIME](https://github.com/marcotcr/lime) explainers after installing aix360:
# - [Approach 1 (aix360 style)](#approach1): LIME explainers can be invoked in a manner similar to other explainer algorithms in aix360 via the implemented wrapper classes.
# - [Approach 2 (original style)](#approach2): Since LIME comes pre-installed in aix360, the explainers can simply be invoked directly.
#
# This notebook showcases both these approaches to invoke LIME. The notebook is based on the following example from the original LIME tutorial: https://marcotcr.github.io/lime/tutorials/Lime%20-%20multiclass.html
# ## Approach 1 (aix360 style)
#
# - Note the import statement related to LimeTextExplainer
# In[1]:
from __future__ import print_function
import sklearn
import numpy as np
import sklearn
import sklearn.ensemble
import sklearn.metrics
# Importing LimeTextExplainer (aix360 sytle)
from aix360.algorithms.lime import LimeTextExplainer
# In[2]:
# Supress jupyter warnings if required for cleaner output
import warnings
warnings.simplefilter('ignore')
# ### Fetching data, training a classifier
# In[3]:
from sklearn.datasets import fetch_20newsgroups
newsgroups_train = fetch_20newsgroups(subset='train')
newsgroups_test = fetch_20newsgroups(subset='test')
# making class names shorter
class_names = [x.split('.')[-1] if 'misc' not in x else '.'.join(x.split('.')[-2:]) for x in newsgroups_train.target_names]
class_names[3] = 'pc.hardware'
class_names[4] = 'mac.hardware'
# In[4]:
print(','.join(class_names))
# In[5]:
vectorizer = sklearn.feature_extraction.text.TfidfVectorizer(lowercase=False)
train_vectors = vectorizer.fit_transform(newsgroups_train.data)
test_vectors = vectorizer.transform(newsgroups_test.data)
# In[6]:
from sklearn.naive_bayes import MultinomialNB
nb = MultinomialNB(alpha=.01)
nb.fit(train_vectors, newsgroups_train.target)
# In[7]:
pred = nb.predict(test_vectors)
sklearn.metrics.f1_score(newsgroups_test.target, pred, average='weighted')
# ### Explaining predictions using lime
# In[8]:
from sklearn.pipeline import make_pipeline
c = make_pipeline(vectorizer, nb)
# In[9]:
print(c.predict_proba([newsgroups_test.data[0]]).round(3))
# In[10]:
limeexplainer = LimeTextExplainer(class_names=class_names)
print(type(limeexplainer))
# In[11]:
idx = 1340
# aix360 style for explaining input instances
exp = limeexplainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=6, labels=[0, 17])
print('Document id: %d' % idx)
print('Predicted class =', class_names[nb.predict(test_vectors[idx]).reshape(1,-1)[0,0]])
print('True class: %s' % class_names[newsgroups_test.target[idx]])
# In[12]:
print ('Explanation for class %s' % class_names[0])
print ('\n'.join(map(str, exp.as_list(label=0))))
print ()
print ('Explanation for class %s' % class_names[17])
print ('\n'.join(map(str, exp.as_list(label=17))))
# In[13]:
# aix360 style for explaining input instances
exp = limeexplainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=6, top_labels=2)
print(exp.available_labels())
# In[14]:
exp.show_in_notebook(text=False)
# In[15]:
exp.show_in_notebook(text=newsgroups_test.data[idx], labels=(0,))
# ## Approach 2 (original style)
#
# - Note the import statement related to LimeTextExplainer
# In[16]:
from __future__ import print_function
import sklearn
import numpy as np
import sklearn
import sklearn.ensemble
import sklearn.metrics
# Importing LimeTextExplainer (original style)
from lime.lime_text import LimeTextExplainer
# In[17]:
# Supress jupyter warnings if required for cleaner output
import warnings
warnings.simplefilter('ignore')
# ### Fetching data, training a classifier
# In[18]:
from sklearn.datasets import fetch_20newsgroups
newsgroups_train = fetch_20newsgroups(subset='train')
newsgroups_test = fetch_20newsgroups(subset='test')
# making class names shorter
class_names = [x.split('.')[-1] if 'misc' not in x else '.'.join(x.split('.')[-2:]) for x in newsgroups_train.target_names]
class_names[3] = 'pc.hardware'
class_names[4] = 'mac.hardware'
# In[19]:
print(','.join(class_names))
# In[20]:
vectorizer = sklearn.feature_extraction.text.TfidfVectorizer(lowercase=False)
train_vectors = vectorizer.fit_transform(newsgroups_train.data)
test_vectors = vectorizer.transform(newsgroups_test.data)
# In[21]:
from sklearn.naive_bayes import MultinomialNB
nb = MultinomialNB(alpha=.01)
nb.fit(train_vectors, newsgroups_train.target)
# In[22]:
pred = nb.predict(test_vectors)
sklearn.metrics.f1_score(newsgroups_test.target, pred, average='weighted')
# ### Explaining predictions using lime
# In[23]:
from sklearn.pipeline import make_pipeline
c = make_pipeline(vectorizer, nb)
# In[24]:
print(c.predict_proba([newsgroups_test.data[0]]).round(3))
# In[25]:
explainer = LimeTextExplainer(class_names=class_names)
print(type(explainer))
# In[26]:
idx = 1340
# LIME original style for explaining input instances
exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=6, labels=[0, 17])
print('Document id: %d' % idx)
print('Predicted class =', class_names[nb.predict(test_vectors[idx]).reshape(1,-1)[0,0]])
print('True class: %s' % class_names[newsgroups_test.target[idx]])
# In[27]:
print ('Explanation for class %s' % class_names[0])
print ('\n'.join(map(str, exp.as_list(label=0))))
print ()
print ('Explanation for class %s' % class_names[17])
print ('\n'.join(map(str, exp.as_list(label=17))))
# In[28]:
# LIME original style for explaining input instances
exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=6, top_labels=2)
print(exp.available_labels())
# In[29]:
exp.show_in_notebook(text=False)
# In[30]:
exp.show_in_notebook(text=newsgroups_test.data[idx], labels=(0,))