import pandas as pd
import numpy as np
import text_normalizer as tn
dataset = pd.read_csv(r'movie_reviews.csv')
# take a peek at the data
print(dataset.head())
reviews = np.array(dataset['review'])
sentiments = np.array(dataset['sentiment'])
# build train and test datasets
train_reviews = reviews[:35000]
train_sentiments = sentiments[:35000]
test_reviews = reviews[35000:]
test_sentiments = sentiments[35000:]
# normalize datasets
norm_train_reviews = tn.normalize_corpus(train_reviews)
norm_test_reviews = tn.normalize_corpus(test_reviews)
review sentiment 0 One of the other reviewers has mentioned that ... positive 1 A wonderful little production. <br /><br />The... positive 2 I thought this was a wonderful way to spend ti... positive 3 Basically there's a family where a little boy ... negative 4 Petter Mattei's "Love in the Time of Money" is... positive
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
import warnings
warnings.filterwarnings("ignore")
# build BOW features on train reviews
cv = CountVectorizer(binary=False, min_df=0.0, max_df=1.0, ngram_range=(1,2))
cv_train_features = cv.fit_transform(norm_train_reviews)
# build Logistic Regression model
lr = LogisticRegression()
lr.fit(cv_train_features, train_sentiments)
# Build Text Classification Pipeline
lr_pipeline = make_pipeline(cv, lr)
# save the list of prediction classes (positive, negative)
classes = list(lr_pipeline.classes_)
lr_pipeline.predict(['the lord of the rings is an excellent movie',
'i hated the recent movie on tv, it was so bad'])
array(['positive', 'negative'], dtype=object)
pd.DataFrame(lr_pipeline.predict_proba(['the lord of the rings is an excellent movie',
'i hated the recent movie on tv, it was so bad']), columns=classes)
negative | positive | |
---|---|---|
0 | 0.169653 | 0.830347 |
1 | 0.730814 | 0.269186 |
from skater.core.local_interpretation.lime.lime_text import LimeTextExplainer
explainer = LimeTextExplainer(class_names=classes)
def interpret_classification_model_prediction(doc_index, norm_corpus, corpus,
prediction_labels, explainer_obj):
# display model prediction and actual sentiments
print("Test document index: {index}\nActual sentiment: {actual}\nPredicted sentiment: {predicted}"
.format(index=doc_index, actual=prediction_labels[doc_index],
predicted=lr_pipeline.predict([norm_corpus[doc_index]])))
# display actual review content
print("\nReview:", corpus[doc_index])
# display prediction probabilities
print("\nModel Prediction Probabilities:")
for probs in zip(classes, lr_pipeline.predict_proba([norm_corpus[doc_index]])[0]):
print(probs)
# display model prediction interpretation
exp = explainer.explain_instance(norm_corpus[doc_index],
lr_pipeline.predict_proba, num_features=10,
labels=[1])
exp.show_in_notebook()
doc_index = 100
interpret_classification_model_prediction(doc_index=doc_index, norm_corpus=norm_test_reviews,
corpus=test_reviews, prediction_labels=test_sentiments,
explainer_obj=explainer)
Test document index: 100 Actual sentiment: negative Predicted sentiment: ['negative'] Review: Worst movie, (with the best reviews given it) I've ever seen. Over the top dialog, acting, and direction. more slasher flick than thriller.With all the great reviews this movie got I'm appalled that it turned out so silly. shame on you martin scorsese Model Prediction Probabilities: ('negative', 0.8099323456145181) ('positive', 0.19006765438548187)
doc_index = 2000
interpret_classification_model_prediction(doc_index=doc_index, norm_corpus=norm_test_reviews,
corpus=test_reviews, prediction_labels=test_sentiments,
explainer_obj=explainer)
Test document index: 2000 Actual sentiment: positive Predicted sentiment: ['positive'] Review: I really liked the Movie "JOE." It has really become a cult classic among certain age groups.<br /><br />The Producer of this movie is a personal friend of mine. He is my Stepsons Father-In-Law. He lives in Manhattan's West side, and has a Bungalow. in Southampton, Long Island. His son-in-law live next door to his Bungalow.<br /><br />Presently, he does not do any Producing, But dabbles in a business with HBO movies.<br /><br />As a person, Mr. Gil is a real gentleman and I wish he would have continued in the production business of move making. Model Prediction Probabilities: ('negative', 0.020629181561415355) ('positive', 0.97937081843858464)
doc_index = 347
interpret_classification_model_prediction(doc_index=doc_index, norm_corpus=norm_test_reviews,
corpus=test_reviews, prediction_labels=test_sentiments,
explainer_obj=explainer)
Test document index: 347 Actual sentiment: negative Predicted sentiment: ['positive'] Review: When I first saw this film in cinema 11 years ago, I loved it. I still think the directing and cinematography are excellent, as is the music. But it's really the script that has over the time started to bother me more and more. I find Emma Thompson's writing self-absorbed and unfaithful to the original book; she has reduced Marianne to a side-character, a second fiddle to her much too old, much too severe Elinor - she in the movie is given many sort of 'focus moments', and often they appear to be there just to show off Thompson herself.<br /><br />I do understand her cutting off several characters from the book, but leaving out the one scene where Willoughby in the book is redeemed? For someone who red and cherished the book long before the movie, those are the things always difficult to digest.<br /><br />As for the actors, I love Kate Winslet as Marianne. She is not given the best script in the world to work with but she still pulls it up gracefully, without too much sentimentality. Alan Rickman is great, a bit old perhaps, but he plays the role beautifully. And Elizabeth Spriggs, she is absolutely fantastic as always. Model Prediction Probabilities: ('negative', 0.067198213044844413) ('positive', 0.93280178695515559)