Load Classfier¶

In [1]:

# Importing the libraries
import numpy as np
import re
import pickle 
import nltk
from nltk.corpus import stopwords
from sklearn.datasets import load_files
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/uzaycetin/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!

Out[1]:

True

In [3]:

# Using our classifier
with open('pre-trained-model/tfidfmodel.pickle','rb') as f:
    tfidf = pickle.load(f)
    
with open('pre-trained-model/classifier.pickle','rb') as f:
    clf = pickle.load(f)
    

Test on a new Data¶

In [4]:

sample = ["You are a nice person man, have a good life"]
sample = tfidf.transform(sample).toarray()
sentiment = clf.predict(sample)
sentiment

Out[4]:

array([1])

In [5]:

sample = ["Logistic regression is not good! "]
sample = tfidf.transform(sample).toarray()
sentiment = clf.predict(sample)
sentiment

Out[5]:

array([1])

Test on Twitter¶

In [6]:

import json
import tweepy
import time
from tweepy import OAuthHandler


consumer_key = 'yoIwFkjZGYDa49aO16XqSNqcN'
consumer_secret = 'gl4LQOItV7Z1aFwNrlvaiKJ3t8o8h99blMIAmnmdHxYjzjRAxO' 
access_token = '624310916-E7fDF2IE8P6bfY1oVFglASf6F8RnxMd3vgSXFqnZ'
access_token_secret ='ID9JcoXHsDcKtvNcnmBGcCQhUlO0wmwAxBJ6LCesiUAas'
 
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
 
#api = tweepy.API(auth)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, retry_count=3, retry_delay=60)

In [7]:

# Creates the user object. The me() method returns the user whose authentication keys were used.
user = api.me()
 
print('Name: ' + user.name)
print('ID: ' + str(user.id))
print('Location: ' + user.location)

Name: Bijoyan Das
ID: 624310916
Location: Calcutta, India

In [8]:

args = ['Khashoggi'];
api = tweepy.API(auth,timeout=10)

# Fetching the tweets
list_tweets = []

query = args[0]
if len(args) == 1:
    for status in tweepy.Cursor(api.search,q=query+" -filter:retweets",lang='en',result_type='recent',geocode="22.1568,89.4332,500km").items(100):
        list_tweets.append(status.text)

In [9]:

list_tweets[:4]

Out[9]:

['Khashoggi killers ‘will be prosecuted in Saudi Arabia’: Saudi FM  https://t.co/guT8LwGo3T',
 'Mattis: Khashoggi murder undermines Middle East stability https://t.co/m8AilpBnr7',
 "Saudi Arabia says it is beacon of 'light' against Iran despite Khashoggi crisis https://t.co/TJD7WLOx4t",
 'French President gives a shut up call to his European allies? Interesting read in Real Politick, CSS aspirants must… https://t.co/KZcU8eIx7f']

In [10]:

mapping = {0:'positive', 1:'negative'}

Twitter Data Claening¶

In [11]:

# Preprocessing the tweets

sent_tweets = []
for tweet in list_tweets:
    tweet = re.sub(r"^https://t.co/[a-zA-Z0-9]*\s", " ", tweet)
    tweet = re.sub(r"\s+https://t.co/[a-zA-Z0-9]*\s", " ", tweet)
    tweet = re.sub(r"\s+https://t.co/[a-zA-Z0-9]*$", " ", tweet)
    tweet = tweet.lower()
    tweet = re.sub(r"that's","that is",tweet)
    tweet = re.sub(r"there's","there is",tweet)
    tweet = re.sub(r"what's","what is",tweet)
    tweet = re.sub(r"where's","where is",tweet)
    tweet = re.sub(r"it's","it is",tweet)
    tweet = re.sub(r"who's","who is",tweet)
    tweet = re.sub(r"i'm","i am",tweet)
    tweet = re.sub(r"she's","she is",tweet)
    tweet = re.sub(r"he's","he is",tweet)
    tweet = re.sub(r"they're","they are",tweet)
    tweet = re.sub(r"who're","who are",tweet)
    tweet = re.sub(r"ain't","am not",tweet)
    tweet = re.sub(r"wouldn't","would not",tweet)
    tweet = re.sub(r"shouldn't","should not",tweet)
    tweet = re.sub(r"can't","can not",tweet)
    tweet = re.sub(r"couldn't","could not",tweet)
    tweet = re.sub(r"won't","will not",tweet)
    tweet = re.sub(r"\W"," ",tweet)
    tweet = re.sub(r"\d"," ",tweet)
    tweet = re.sub(r"\s+[a-z]\s+"," ",tweet)
    tweet = re.sub(r"\s+[a-z]$"," ",tweet)
    tweet = re.sub(r"^[a-z]\s+"," ",tweet)
    tweet = re.sub(r"\s+"," ",tweet)
    
    sent = clf.predict(tfidf.transform([tweet]).toarray())
    sent_tweets.append((tweet, mapping[int(sent)]))

sent_tweets[:4]

Out[11]:

[('khashoggi killers will be prosecuted in saudi arabia saudi fm ',
  'negative'),
 ('mattis khashoggi murder undermines middle east stability ', 'positive'),
 ('saudi arabia says it is beacon of light against iran despite khashoggi crisis ',
  'negative'),
 ('french president gives shut up call to his european allies interesting read in real politick css aspirants must ',
  'negative')]

Positive/negative Split¶

In [12]:

pos = len([s for t, s in sent_tweets if s == 'positive'])
neg = len(sent_tweets) - pos
pos, neg

Out[12]:

(31, 69)

In [14]:

# Visualizing the results
import matplotlib.pyplot as plt
import numpy as np
plt.bar(['Positive','Negative'], [pos, neg], alpha = 0.5)
#plt.xticks(y_pos,objects)
plt.ylabel('Number')
plt.title('Number of Postive and NEgative Tweets')


plt.show()

In [ ]:

In [15]:

sample = ["You are a nice person man, have a good life"]
sample = tfidf.transform(sample).toarray()
sentiment = clf.predict(sample)
sentiment

Out[15]:

array([1])

In [19]:

from textblob import TextBlob

In [20]:

sample = []
yorum = "Kampanya berbat. kötü ya!"
blob = TextBlob(yorum)
sample.append(str(blob.translate(to="en")))

In [21]:

print(sample)

['The campaign sucks. bad either!']

In [22]:

sample = tfidf.transform(sample).toarray()
sentiment = clf.predict(sample)
sentiment

Out[22]:

array([0])

In [ ]: