There are several theories of emotion, all indicate that emotions are a precursor of behaviour.
Most used model for text analysis. Maps into 2D plane. Cannot map back into emotions.
Very precise, closest to biochemical definition of emotions. Not practical.
Widely used in face recognition programs. Widely accepted. Requires face-recognition software, thus very costly.
Based on etological observations. Superset of Ekman's model. http://saifmohammad.com/WebPages/NRC-Emotion-Lexicon.htm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import string
%matplotlib inline
emotions = ["anger",
"anticipation",
"disgust",
"fear",
"joy",
"negative",
"positive",
"sadness",
"surprise",
"trust"]
df = pd.read_csv("dict.csv")
df.head(10)
Word | anger | anticipation | disgust | fear | joy | negative | positive | sadness | surprise | trust | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | aback | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | abacus | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
2 | abandon | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 |
3 | abandoned | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 |
4 | abandonment | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 1 | 0 |
5 | abate | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
6 | abatement | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
7 | abba | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
8 | abbot | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
9 | abbreviate | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
df[emotions].sum()
anger 1247 anticipation 839 disgust 1058 fear 1476 joy 689 negative 3324 positive 2312 sadness 1191 surprise 534 trust 1231 dtype: int64
print(len(df))
df[emotions].sum()/len(df)*100
14181
anger 8.793456 anticipation 5.916367 disgust 7.460687 fear 10.408293 joy 4.858614 negative 23.439814 positive 16.303505 sadness 8.398561 surprise 3.765602 trust 8.680629 dtype: float64
fig = plt.figure(figsize=(16, 10))
y_pos = np.arange(len(emotions))
percentage = df[emotions].sum()/len(df)*100
percentage = percentage.tolist()
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
colors = ["#007C37","#79BF2A","#EBC527","#E66F11","#DB1245","#0C2C30","#F53022","#7D4CA1","#296CAB","#1781AA"]
bars = ax.bar(y_pos, percentage, align='center')
for bar,c in zip(bars,colors):
bar.set_facecolor(c)
plt.xticks(y_pos, emotions)
plt.ylabel('Percentage')
<matplotlib.text.Text at 0x105b2eb70>
Using tweepy for twitter acces, but can be done with any text. http://www.tweepy.org/
Tools used:
from tweepy.streaming import StreamListener
from tweepy import Stream
from tweepy import OAuthHandler
import json
from csv import DictReader
import seaborn as sns
Access twitter with OAuth. https://dev.twitter.com/oauth/overview/application-owner-access-tokens
import myKeys
api_key = myKeys.api_key
api_secret = myKeys.api_secret
access_token_key = myKeys.access_token_key
access_token_secret = myKeys.access_token_secret
Read the csv into a python dictionary
cols = ['anger', 'anticipation', 'disgust', 'fear',
'joy', 'negative', 'positive', 'sadness', 'surprise', 'trust']
dictFile = 'dict.csv'
mainDict = {}
with open(dictFile) as csvFile:
reader = DictReader(csvFile)
for row in reader:
mainDict[row['Word']] = [int(row[i]) for i in cols]
The StreamListener class can be used to handle the incoming tweets. Here every tweet will be given an emotional score in the shape of a 10-valued vector.
class ColorListener(StreamListener):
def __init__(self):
self.tweets = pd.DataFrame(columns=('tweet', 'anger', 'anticipation',
'disgust', 'fear', 'joy', 'negative', 'positive', 'sadness', 'surprise', 'trust'))
def on_data(self, data):
try:
tweet = json.loads(data)
vector = self.score(tweet)
print(vector)
row = pd.Series([tweet['text']]+vector, index=['tweet', 'anger', 'anticipation',
'disgust', 'fear', 'joy', 'negative', 'positive', 'sadness', 'surprise', 'trust'])
self.tweets = self.tweets.append(row, ignore_index=True)
except UnboundLocalError:
raise UnboundLocalError # Tweet doesn't have text
except:
pass
return True
def score(self, tweet):
line = tweet['text'].replace('.','').replace(',','').replace(';','').replace(':','').replace('\t',' ').replace('\n',' ')
words = line.split(' ')
score = [0] * 10
for word in words:
if word in mainDict:
for i in range(len(score)):
score[i] += mainDict[word][i]
return score
def on_error(self, status):
print("Error: ", status)
Let's run it: We can pick other listening filters.
cListener = ColorListener()
auth = OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token_key, access_token_secret)
stream = Stream(auth, cListener)
# Start reading stream for english tweets with the color words
stream.filter(languages=['en'], track=['red', 'green','blue'])
# stream.filter(languages=['en'], track=['trump'])
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [1, 0, 0, 1, 0, 1, 0, 0, 1, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 1, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 1, 0, 1, 1, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 1, 0, 1, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 0, 2, 0, 2, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 1, 1, 0, 2, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] [0, 1, 0, 0, 1, 0, 1, 0, 0, 1] [0, 1, 0, 1, 1, 1, 1, 1, 1, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 0, 2, 0, 2, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 1, 1, 1, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 1, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 0, 1, 0, 1, 0, 0, 1] [0, 2, 0, 1, 1, 0, 1, 0, 1, 2] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [1, 1, 1, 0, 2, 0, 2, 0, 1, 2] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 0, 2, 0, 2, 0, 1, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 2, 0, 2, 0, 0, 2] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [1, 0, 1, 1, 0, 1, 0, 1, 0, 0] [0, 2, 0, 0, 4, 1, 4, 0, 2, 2] [0, 0, 0, 0, 0, 0, 1, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [1, 2, 1, 1, 1, 1, 2, 1, 0, 2] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] [0, 2, 0, 0, 4, 1, 4, 0, 2, 2] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [2, 2, 1, 0, 0, 3, 0, 2, 0, 0] [0, 1, 0, 0, 1, 1, 1, 0, 1, 2] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 1, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 1, 0, 0, 1, 0, 0, 0, 1] [0, 0, 0, 0, 0, 1, 0, 1, 0, 0] [0, 0, 0, 0, 0, 1, 0, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [2, 2, 1, 0, 0, 3, 0, 2, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [1, 0, 0, 1, 0, 1, 0, 0, 1, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 0, 0, 0, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 1, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 0, 0, 0, 2, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 0, 2, 0, 2, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [2, 2, 0, 0, 1, 0, 2, 0, 1, 2] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 1, 1, 1, 1, 0, 1, 2] [1, 1, 0, 0, 0, 0, 2, 0, 0, 2] [1, 0, 1, 1, 1, 1, 1, 1, 0, 1] [0, 1, 0, 0, 1, 0, 1, 0, 1, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] [0, 0, 0, 0, 0, 1, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] [0, 1, 0, 0, 1, 0, 1, 1, 0, 1] [0, 0, 0, 1, 0, 0, 1, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 0, 2, 0, 2, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 1, 0, 0, 1, 0, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] [0, 1, 0, 0, 0, 1, 0, 0, 0, 0] [0, 0, 0, 0, 1, 0, 1, 0, 0, 0] [0, 1, 0, 0, 2, 0, 2, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 1, 1, 0, 1, 1, 0, 0, 1] [0, 0, 1, 0, 0, 1, 0, 0, 0, 1] [0, 0, 0, 1, 1, 1, 1, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 1, 1, 0, 1, 1, 0, 0, 1] [0, 0, 1, 1, 0, 1, 1, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 1, 0, 2, 0, 0, 2] [0, 2, 0, 0, 2, 0, 2, 0, 1, 2] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [1, 0, 1, 2, 0, 2, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 1, 0, 1, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 1, 0, 2, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 1, 0, 1, 0, 2, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] [0, 0, 0, 0, 0, 1, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] [0, 1, 0, 0, 2, 0, 2, 0, 0, 2] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 1, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 1, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 0, 0, 0, 1, 0, 0, 1] [0, 0, 0, 0, 1, 0, 1, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 1, 0, 1, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 0, 0, 0, 0, 1, 0, 0] [0, 1, 0, 0, 1, 1, 1, 1, 0, 0] [0, 0, 0, 0, 0, 0, 2, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 1, 0, 0, 0, 0] [0, 1, 0, 0, 1, 0, 1, 1, 0, 1] [0, 1, 0, 0, 1, 0, 1, 1, 0, 1] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 1, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 1, 0, 1] [1, 0, 0, 2, 0, 1, 0, 0, 0, 0] [1, 2, 0, 1, 1, 0, 1, 0, 1, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 0, 0, 1, 2, 1, 1, 1] [0, 0, 0, 0, 1, 0, 1, 0, 0, 1] [0, 0, 0, 0, 1, 0, 2, 0, 0, 2] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 0, 2, 0, 2, 0, 0, 1] [1, 2, 0, 0, 1, 0, 1, 0, 0, 0] [0, 0, 0, 0, 0, 1, 0, 1, 0, 0] [0, 0, 1, 1, 0, 1, 0, 1, 0, 0] [0, 0, 1, 1, 0, 1, 1, 2, 0, 1] [2, 1, 0, 0, 1, 1, 1, 1, 1, 1] [0, 0, 1, 0, 0, 1, 0, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 2, 0, 2, 0, 0, 2] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 1, 0, 1, 1, 0] [0, 0, 0, 1, 0, 1, 1, 1, 0, 0] [0, 0, 0, 0, 1, 0, 1, 1, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 2, 0, 0, 1, 0, 1, 0, 1, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 1, 0, 1, 1, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] [0, 1, 1, 0, 1, 0, 2, 0, 1, 1] [0, 0, 0, 0, 1, 0, 2, 0, 0, 2] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 1, 1, 0, 0] [1, 0, 0, 2, 0, 2, 1, 1, 1, 1] [0, 0, 0, 0, 0, 0, 1, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 0, 1, 0, 2, 0, 0, 2] [0, 0, 0, 0, 0, 0, 2, 1, 1, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 1, 0, 0, 1, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 1, 0, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 1, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] [0, 0, 0, 0, 0, 0, 2, 0, 0, 0] [0, 1, 0, 0, 2, 0, 2, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 0, 0, 0, 0, 0, 1, 0] [0, 0, 0, 1, 0, 1, 2, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 1, 0, 1, 0, 0, 1] [0, 1, 0, 0, 2, 0, 2, 0, 0, 1] [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 1, 1, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 1, 0, 0, 1, 0, 0, 0, 1] [0, 0, 1, 0, 0, 1, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] [0, 0, 0, 0, 0, 1, 0, 1, 1, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 1] [2, 0, 2, 2, 0, 2, 0, 2, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 1, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 1, 0, 2, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 2, 0, 0, 4, 1, 4, 0, 2, 2] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] [0, 0, 1, 0, 0, 1, 0, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 1, 0, 0, 2, 0, 2, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 1, 0, 1, 0, 0, 1] [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] [0, 1, 0, 0, 1, 0, 1, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 1, 0, 2, 0, 0, 2] [0, 0, 0, 0, 0, 0, 0, 0, 0, 1] [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] [1, 0, 0, 0, 0, 1, 0, 0, 0, 0] [0, 3, 0, 0, 1, 0, 1, 0, 1, 1] [0, 0, 0, 0, 1, 0, 1, 0, 0, 0] [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] [0, 0, 0, 0, 1, 0, 2, 0, 0, 2] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [1, 0, 0, 1, 0, 1, 0, 0, 1, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 1] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 1, 0, 0, 0, 0, 1, 0, 1] [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] [0, 1, 2, 0, 1, 1, 1, 0, 0, 1] [0, 1, 0, 0, 1, 1, 2, 1, 0, 1] [0, 0, 0, 0, 1, 0, 1, 0, 0, 0] [0, 0, 0, 0, 0, 0, 1, 1, 0, 0] [0, 1, 0, 0, 1, 0, 1, 0, 1, 1] [1, 0, 1, 1, 0, 1, 0, 1, 0, 0] [0, 0, 0, 1, 0, 2, 0, 2, 0, 0] [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] [1, 1, 1, 0, 1, 2, 1, 1, 0, 0] [0, 0, 0, 1, 0, 1, 0, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0, 0, 0, 0, 0, 0, 1, 1, 0, 0] [0, 0, 0, 0, 0, 0, 1, 1, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-10-7098589ea4ce> in <module>() 6 7 # Start reading stream for english tweets with the color words ----> 8 stream.filter(languages=['en'], track=['red', 'green','blue']) 9 # stream.filter(languages=['en'], track=['trump']) /usr/local/lib/python3.6/site-packages/tweepy/streaming.py in filter(self, follow, track, async, locations, stall_warnings, languages, encoding, filter_level) 443 self.session.params = {'delimited': 'length'} 444 self.host = 'stream.twitter.com' --> 445 self._start(async) 446 447 def sitestream(self, follow, stall_warnings=False, /usr/local/lib/python3.6/site-packages/tweepy/streaming.py in _start(self, async) 359 self._thread.start() 360 else: --> 361 self._run() 362 363 def on_closed(self, resp): /usr/local/lib/python3.6/site-packages/tweepy/streaming.py in _run(self) 261 self.snooze_time = self.snooze_time_step 262 self.listener.on_connect() --> 263 self._read_loop(resp) 264 except (Timeout, ssl.SSLError) as exc: 265 # This is still necessary, as a SSLError can actually be /usr/local/lib/python3.6/site-packages/tweepy/streaming.py in _read_loop(self, resp) 311 length = 0 312 while not resp.raw.closed: --> 313 line = buf.read_line().strip() 314 if not line: 315 self.listener.keep_alive() # keep-alive new lines are expected /usr/local/lib/python3.6/site-packages/tweepy/streaming.py in read_line(self, sep) 177 else: 178 start = len(self._buffer) --> 179 self._buffer += self._stream.read(self._chunk_size) 180 181 def _pop(self, length): /usr/local/lib/python3.6/site-packages/urllib3/response.py in read(self, amt, decode_content, cache_content) 382 else: 383 cache_content = False --> 384 data = self._fp.read(amt) 385 if amt != 0 and not data: # Platform-specific: Buggy versions of Python. 386 # Close the connection when no data is returned /usr/local/Cellar/python3/3.6.0/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py in read(self, amt) 447 # Amount is given, implement using readinto 448 b = bytearray(amt) --> 449 n = self.readinto(b) 450 return memoryview(b)[:n].tobytes() 451 else: /usr/local/Cellar/python3/3.6.0/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py in readinto(self, b) 481 482 if self.chunked: --> 483 return self._readinto_chunked(b) 484 485 if self.length is not None: /usr/local/Cellar/python3/3.6.0/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py in _readinto_chunked(self, b) 576 try: 577 while True: --> 578 chunk_left = self._get_chunk_left() 579 if chunk_left is None: 580 return total_bytes /usr/local/Cellar/python3/3.6.0/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py in _get_chunk_left(self) 544 self._safe_read(2) # toss the CRLF at the end of the chunk 545 try: --> 546 chunk_left = self._read_next_chunk_size() 547 except ValueError: 548 raise IncompleteRead(b'') /usr/local/Cellar/python3/3.6.0/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py in _read_next_chunk_size(self) 504 def _read_next_chunk_size(self): 505 # Read the next chunk size from the file --> 506 line = self.fp.readline(_MAXLINE + 1) 507 if len(line) > _MAXLINE: 508 raise LineTooLong("chunk size") /usr/local/Cellar/python3/3.6.0/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socket.py in readinto(self, b) 584 while True: 585 try: --> 586 return self._sock.recv_into(b) 587 except timeout: 588 self._timeout_occurred = True /usr/local/Cellar/python3/3.6.0/Frameworks/Python.framework/Versions/3.6/lib/python3.6/ssl.py in recv_into(self, buffer, nbytes, flags) 1000 "non-zero flags not allowed in calls to recv_into() on %s" % 1001 self.__class__) -> 1002 return self.read(nbytes, buffer) 1003 else: 1004 return socket.recv_into(self, buffer, nbytes, flags) /usr/local/Cellar/python3/3.6.0/Frameworks/Python.framework/Versions/3.6/lib/python3.6/ssl.py in read(self, len, buffer) 863 raise ValueError("Read on closed or unwrapped SSL socket.") 864 try: --> 865 return self._sslobj.read(len, buffer) 866 except SSLError as x: 867 if x.args[0] == SSL_ERROR_EOF and self.suppress_ragged_eofs: /usr/local/Cellar/python3/3.6.0/Frameworks/Python.framework/Versions/3.6/lib/python3.6/ssl.py in read(self, len, buffer) 623 """ 624 if buffer is not None: --> 625 v = self._sslobj.read(len, buffer) 626 else: 627 v = self._sslobj.read(len) KeyboardInterrupt:
df = cListener.tweets
print(len(df.index)) # Number of rows
339
df.head(10) # How the data looks like
tweet | anger | anticipation | disgust | fear | joy | negative | positive | sadness | surprise | trust | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | RT @RodriguezThaGod: No homo but this nigga no... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1 | @AshlynnMCosplay @BottsDave @OJessicaNigri @Do... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2 | RT @MrGoodBeard_: I say Aunt, Aunt is them red... | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 |
3 | RT @KicksDealsCA: Triple red Sock Darts can be... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
4 | RT @bikeshareTO: 📣 Let's go Blue Jays! ⚾ We'll... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
5 | RT @tonyposnanski: @realDonaldTrump What about... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
6 | I entered a giveaway for a chance to win "Aibl... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
7 | 23:30: "Can't Stop" von Red Hot Chili Peppers | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
8 | ATTI: ML07Red Bus 293 on the Red Line is @ Uni... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
9 | Diesel Sunglasses Acetate Men Blue New https:... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
df.plot(figsize=(16, 6)) # Plot the sentiment as a time series
<matplotlib.axes._subplots.AxesSubplot at 0x10b905da0>
df['trust'].plot(figsize=(16, 6))
<matplotlib.axes._subplots.AxesSubplot at 0x10be767f0>
df.plot(subplots=True, figsize=(16, 10))
array([<matplotlib.axes._subplots.AxesSubplot object at 0x10bf9f390>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c1f85c0>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c25bef0>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c2b4c18>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c31e668>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c31e6a0>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c3daa20>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c42ec18>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c49c390>, <matplotlib.axes._subplots.AxesSubplot object at 0x10c4f34a8>], dtype=object)
df['trust'].plot.kde(figsize=(16, 6))
<matplotlib.axes._subplots.AxesSubplot at 0x10c75aac8>
df.sum()
tweet RT @RodriguezThaGod: No homo but this nigga no... anger 26 anticipation 66 disgust 28 fear 33 joy 88 negative 72 positive 150 sadness 77 surprise 33 trust 97 dtype: object
fig = plt.figure(figsize=(16, 10))
y_pos = np.arange(len(emotions))
percentage = df[emotions].sum()/len(df)*100
percentage = percentage.tolist()
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
colors = ["#007C37","#79BF2A","#EBC527","#E66F11","#DB1245","#0C2C30","#F53022","#7D4CA1","#296CAB","#1781AA"]
bars = ax.bar(y_pos, percentage, align='center')
for bar,c in zip(bars,colors):
bar.set_facecolor(c)
plt.xticks(y_pos, emotions)
plt.ylabel('Percentage')
<matplotlib.text.Text at 0x10ca1e748>
cor = df.corr()
cor
anger | anticipation | disgust | fear | joy | negative | positive | sadness | surprise | trust | |
---|---|---|---|---|---|---|---|---|---|---|
anger | 1.000000 | 0.337338 | 0.404506 | 0.431530 | 0.063649 | 0.531125 | 0.068436 | 0.306094 | 0.214102 | 0.107636 |
anticipation | 0.337338 | 1.000000 | 0.111679 | 0.045942 | 0.659456 | 0.209921 | 0.571767 | 0.077925 | 0.542162 | 0.536310 |
disgust | 0.404506 | 0.111679 | 1.000000 | 0.366470 | 0.011724 | 0.559657 | 0.021466 | 0.274335 | -0.022263 | 0.192948 |
fear | 0.431530 | 0.045942 | 0.366470 | 1.000000 | -0.008045 | 0.531596 | 0.063771 | 0.298261 | 0.157383 | 0.086498 |
joy | 0.063649 | 0.659456 | 0.011724 | -0.008045 | 1.000000 | 0.062102 | 0.812033 | -0.082484 | 0.488003 | 0.700250 |
negative | 0.531125 | 0.209921 | 0.559657 | 0.531596 | 0.062102 | 1.000000 | 0.058457 | 0.528452 | 0.244778 | 0.122921 |
positive | 0.068436 | 0.571767 | 0.021466 | 0.063771 | 0.812033 | 0.058457 | 1.000000 | 0.007979 | 0.417559 | 0.702282 |
sadness | 0.306094 | 0.077925 | 0.274335 | 0.298261 | -0.082484 | 0.528452 | 0.007979 | 1.000000 | 0.029739 | -0.068251 |
surprise | 0.214102 | 0.542162 | -0.022263 | 0.157383 | 0.488003 | 0.244778 | 0.417559 | 0.029739 | 1.000000 | 0.360765 |
trust | 0.107636 | 0.536310 | 0.192948 | 0.086498 | 0.700250 | 0.122921 | 0.702282 | -0.068251 | 0.360765 | 1.000000 |
sns.heatmap(cor)
<matplotlib.axes._subplots.AxesSubplot at 0x10cc20f60>
sns.clustermap(cor)
/usr/local/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead. warnings.warn(message, mplDeprecation, stacklevel=1)
<seaborn.matrix.ClusterGrid at 0x10ca15cf8>
from matplotlib.pyplot import figure, show, rc
fig = figure(figsize=(10, 10))
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=True)
colors = ['#007C37','#79BF2A','#EBC527','#E66F11','#DB1245','#7D4CA1','#296CAB','#1781AA']
N = 8
theta = np.arange(0, 2*np.pi, 2*np.pi/N)-(np.pi/(2*N))
radii = df[['fear','trust','joy','anticipation','anger','disgust','sadness','surprise']].sum()
width = np.pi/N
bars = ax.bar(theta, radii, width=width, bottom=0.0)
for r,bar,c in zip(radii,bars,colors):
bar.set_facecolor(c)
bar.set_alpha(1)
show()
Generalization: Dictionary based lexical analysis.
Correlation with other variables.