# Importing the libraries
import numpy as np
import re
import pickle
import nltk
from nltk.corpus import stopwords
from sklearn.datasets import load_files
nltk.download('stopwords')
[nltk_data] Downloading package stopwords to [nltk_data] /Users/uzaycetin/nltk_data... [nltk_data] Package stopwords is already up-to-date!
True
# Using our classifier
with open('pre-trained-model/tfidfmodel.pickle','rb') as f:
tfidf = pickle.load(f)
with open('pre-trained-model/classifier.pickle','rb') as f:
clf = pickle.load(f)
sample = ["You are a nice person man, have a good life"]
sample = tfidf.transform(sample).toarray()
sentiment = clf.predict(sample)
sentiment
array([1])
sample = ["Logistic regression is not good! "]
sample = tfidf.transform(sample).toarray()
sentiment = clf.predict(sample)
sentiment
array([1])
import json
import tweepy
import time
from tweepy import OAuthHandler
consumer_key = 'yoIwFkjZGYDa49aO16XqSNqcN'
consumer_secret = 'gl4LQOItV7Z1aFwNrlvaiKJ3t8o8h99blMIAmnmdHxYjzjRAxO'
access_token = '624310916-E7fDF2IE8P6bfY1oVFglASf6F8RnxMd3vgSXFqnZ'
access_token_secret ='ID9JcoXHsDcKtvNcnmBGcCQhUlO0wmwAxBJ6LCesiUAas'
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
#api = tweepy.API(auth)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, retry_count=3, retry_delay=60)
# Creates the user object. The me() method returns the user whose authentication keys were used.
user = api.me()
print('Name: ' + user.name)
print('ID: ' + str(user.id))
print('Location: ' + user.location)
Name: Bijoyan Das ID: 624310916 Location: Calcutta, India
args = ['Khashoggi'];
api = tweepy.API(auth,timeout=10)
# Fetching the tweets
list_tweets = []
query = args[0]
if len(args) == 1:
for status in tweepy.Cursor(api.search,q=query+" -filter:retweets",lang='en',result_type='recent',geocode="22.1568,89.4332,500km").items(100):
list_tweets.append(status.text)
list_tweets[:4]
['Khashoggi killers ‘will be prosecuted in Saudi Arabia’: Saudi FM https://t.co/guT8LwGo3T', 'Mattis: Khashoggi murder undermines Middle East stability https://t.co/m8AilpBnr7', "Saudi Arabia says it is beacon of 'light' against Iran despite Khashoggi crisis https://t.co/TJD7WLOx4t", 'French President gives a shut up call to his European allies? Interesting read in Real Politick, CSS aspirants must… https://t.co/KZcU8eIx7f']
mapping = {0:'positive', 1:'negative'}
# Preprocessing the tweets
sent_tweets = []
for tweet in list_tweets:
tweet = re.sub(r"^https://t.co/[a-zA-Z0-9]*\s", " ", tweet)
tweet = re.sub(r"\s+https://t.co/[a-zA-Z0-9]*\s", " ", tweet)
tweet = re.sub(r"\s+https://t.co/[a-zA-Z0-9]*$", " ", tweet)
tweet = tweet.lower()
tweet = re.sub(r"that's","that is",tweet)
tweet = re.sub(r"there's","there is",tweet)
tweet = re.sub(r"what's","what is",tweet)
tweet = re.sub(r"where's","where is",tweet)
tweet = re.sub(r"it's","it is",tweet)
tweet = re.sub(r"who's","who is",tweet)
tweet = re.sub(r"i'm","i am",tweet)
tweet = re.sub(r"she's","she is",tweet)
tweet = re.sub(r"he's","he is",tweet)
tweet = re.sub(r"they're","they are",tweet)
tweet = re.sub(r"who're","who are",tweet)
tweet = re.sub(r"ain't","am not",tweet)
tweet = re.sub(r"wouldn't","would not",tweet)
tweet = re.sub(r"shouldn't","should not",tweet)
tweet = re.sub(r"can't","can not",tweet)
tweet = re.sub(r"couldn't","could not",tweet)
tweet = re.sub(r"won't","will not",tweet)
tweet = re.sub(r"\W"," ",tweet)
tweet = re.sub(r"\d"," ",tweet)
tweet = re.sub(r"\s+[a-z]\s+"," ",tweet)
tweet = re.sub(r"\s+[a-z]$"," ",tweet)
tweet = re.sub(r"^[a-z]\s+"," ",tweet)
tweet = re.sub(r"\s+"," ",tweet)
sent = clf.predict(tfidf.transform([tweet]).toarray())
sent_tweets.append((tweet, mapping[int(sent)]))
sent_tweets[:4]
[('khashoggi killers will be prosecuted in saudi arabia saudi fm ', 'negative'), ('mattis khashoggi murder undermines middle east stability ', 'positive'), ('saudi arabia says it is beacon of light against iran despite khashoggi crisis ', 'negative'), ('french president gives shut up call to his european allies interesting read in real politick css aspirants must ', 'negative')]
pos = len([s for t, s in sent_tweets if s == 'positive'])
neg = len(sent_tweets) - pos
pos, neg
(31, 69)
# Visualizing the results
import matplotlib.pyplot as plt
import numpy as np
plt.bar(['Positive','Negative'], [pos, neg], alpha = 0.5)
#plt.xticks(y_pos,objects)
plt.ylabel('Number')
plt.title('Number of Postive and NEgative Tweets')
plt.show()
sample = ["You are a nice person man, have a good life"]
sample = tfidf.transform(sample).toarray()
sentiment = clf.predict(sample)
sentiment
array([1])
from textblob import TextBlob
sample = []
yorum = "Kampanya berbat. kötü ya!"
blob = TextBlob(yorum)
sample.append(str(blob.translate(to="en")))
print(sample)
['The campaign sucks. bad either!']
sample = tfidf.transform(sample).toarray()
sentiment = clf.predict(sample)
sentiment
array([0])