import pandas as pd
import numpy as np
pd.set_option('display.float_format', lambda x: '%.2f' % x)
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline
plt.rcParams['savefig.dpi'] = 150
plt.style.use('ggplot')
import seaborn as sns
sns.set(color_codes=True)
sns.set_palette(sns.color_palette("husl", 8));
sns.mpl.rc("figure", figsize=(8,5))
sns.set_style('whitegrid')
cd ../
C:\Users\Talha\Documents\WinPython3\projects\News-Sharing-by-Sentiment
#read the data -> pn: published news, tn: tweeted news
pn = pd.read_csv('data/pb-sp.csv',encoding='utf-8')
tn = pd.read_csv('data/tw-sp.csv',encoding='utf-8')
outlets = sorted(pn['outlet'].unique().tolist())
pn['sp'] = pn['posemo']-pn['negemo']
tn['sp'] = tn['posemo']-tn['negemo']
tn['sp_t'] = tn['posemo_t']-tn['negemo_t']
#sports
tn[tn.cat=='sports'][['rt','posemo','negemo']].corr()
rt | posemo | negemo | |
---|---|---|---|
rt | 1.00 | 0.14 | -0.04 |
posemo | 0.14 | 1.00 | -0.09 |
negemo | -0.04 | -0.09 | 1.00 |
tn[tn.cat.isin(['politics','middleeast'])][['rt','posemo','negemo']].corr()
rt | posemo | negemo | |
---|---|---|---|
rt | 1.00 | -0.05 | 0.05 |
posemo | -0.05 | 1.00 | 0.03 |
negemo | 0.05 | 0.03 | 1.00 |
#news tweeted multiple types are grouped into two
df4 = tn.groupby('href').filter(lambda g: len(g)>3) #news tweeted at least four times
more_rt = df4.groupby('href').apply(lambda g: g.sort('rt')[['rt','sp_t']].iloc[1:]) # all the tweet excluding the least RTed
less_rt = df4.groupby('href').apply(lambda g: g.sort('rt')[['rt','sp_t']].iloc[0]) #least retweeted tweets
print(more_rt.sp_t.mean(),less_rt.sp_t.mean())
-0.335782241015 -0.08625
df = tn.groupby('href').filter(lambda g: len(g)==3) #news tweeted exactly three times
grouped = df.groupby('href').apply(lambda g: g.sort('rt').reset_index())[['rt','sp_t','cat']]
unstacked = grouped.unstack()
ax = unstacked.sp_t.mean()[:4].plot(kind='bar')
ax.set(xlabel='Retweet order (0 is the least retweeted group)',ylabel='Mean Tweet Sentiment',title='Tweet Sentiment vs RT Groups');
#Sample size for each group
#pd.DataFrame([unstacked.sp_t[[i]].dropna().shape[0] for i in range(unstacked.sp_t.columns.shape[0])],columns=['N'])
unstacked
rt | sp_t | cat | |||||||
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 2 | 0 | 1 | 2 | 0 | 1 | 2 | |
href | |||||||||
http://abcnews.go.com/Business/cup-inventor-john-sylvan-admits-expensive-coffee-pods/story?id=29382481 | 94.00 | 189.00 | 326.00 | 4.35 | -3.84 | -3.84 | business | business | business |
http://abcnews.go.com/Business/embattled-sony-pictures-executive-amy-pascal-steps/story?id=28749965 | 33.00 | 57.00 | 74.00 | 0.00 | 0.00 | 0.00 | business | business | business |
http://abcnews.go.com/Business/kraft-krft-heinz-agree-merge/story?id=29889951 | 123.00 | 166.00 | 208.00 | 4.55 | 8.00 | 7.69 | business | business | business |
http://abcnews.go.com/Business/make-104-acre-florida-island-dream-home/story?id=29732824 | 168.00 | 181.00 | 232.00 | 0.00 | 0.00 | 0.00 | business | business | business |
http://abcnews.go.com/Entertainment/bobbi-kristina-brown-alive-found-unresponsive/story?id=28628653 | 294.00 | 316.00 | 714.00 | 0.00 | 0.00 | 0.00 | entertainment | entertainment | entertainment |
http://abcnews.go.com/Entertainment/duchess-kate-appeals-greater-care-childrens-mental-health/story?id=28988190 | 210.00 | 351.00 | 369.00 | 0.00 | 0.00 | 3.85 | entertainment | entertainment | entertainment |
http://abcnews.go.com/Entertainment/harrison-ford-injured-california-small-plane-crash/story?id=29425681 | 186.00 | 478.00 | 746.00 | 0.00 | 0.00 | 0.00 | entertainment | entertainment | entertainment |
http://abcnews.go.com/Entertainment/jon-stewart-leaving-comedy-central/story?id=28875084 | 118.00 | 179.00 | 189.00 | 5.00 | 0.00 | 3.45 | entertainment | entertainment | entertainment |
http://abcnews.go.com/Entertainment/kevin-costner-god-miracle-bobbi-kristina-brown/story?id=28867469 | 133.00 | 155.00 | 217.00 | 7.69 | 7.69 | 7.69 | entertainment | entertainment | entertainment |
http://abcnews.go.com/Entertainment/meet-elena-avalor-disneys-latina-princess/story?id=28581447 | 292.00 | 810.00 | 1300.00 | 0.00 | 0.00 | 0.00 | entertainment | entertainment | entertainment |
http://abcnews.go.com/Entertainment/new-york-times-media-columnist-david-carr-dies-58/story?id=28936813 | 92.00 | 96.00 | 103.00 | 0.00 | 0.00 | 0.00 | entertainment | entertainment | entertainment |
http://abcnews.go.com/Entertainment/oscars-2015-live-updates-red-carpet/story?id=29075436 | 144.00 | 159.00 | 178.00 | 0.00 | 4.55 | 0.00 | entertainment | entertainment | entertainment |
http://abcnews.go.com/Entertainment/star-trek-star-leonard-nimoy-dies-83/story?id=29274628 | 732.00 | 835.00 | 2022.00 | 4.17 | 0.00 | 0.00 | entertainment | entertainment | entertainment |
http://abcnews.go.com/Entertainment/vanilla-ice-arrested-burglary-florida-police/story?id=29058510 | 71.00 | 146.00 | 156.00 | 0.00 | 0.00 | 0.00 | entertainment | entertainment | entertainment |
http://abcnews.go.com/Health/autism-speaks-urges-parents-vaccinate-children/story?id=28751485 | 428.00 | 544.00 | 565.00 | 0.00 | 0.00 | 0.00 | health | health | health |
http://abcnews.go.com/Health/blind-golden-retriever-smiley-warms-hearts-therapy-dog/story?id=29533746 | 311.00 | 441.00 | 653.00 | 7.69 | 14.29 | 7.69 | health | health | health |
http://abcnews.go.com/Health/breakdown-ingredients-childhood-vaccines/story?id=28859870 | 68.00 | 71.00 | 102.00 | 0.00 | 0.00 | 0.00 | health | health | health |
http://abcnews.go.com/Health/dads-heartfelt-plea-congress-year-leukemia-exposed-measles/story?id=28866376 | 189.00 | 207.00 | 331.00 | 0.00 | 0.00 | 0.00 | health | health | health |
http://abcnews.go.com/Health/dangerous-bacteria-mysteriously-escapes-louisiana-monkey-lab/story?id=29327907 | 98.00 | 100.00 | 129.00 | -6.67 | -7.14 | -7.14 | health | health | health |
http://abcnews.go.com/Health/doctors-crawling-finish-line-great-idea/story?id=28998255 | 342.00 | 468.00 | 470.00 | 0.00 | 0.00 | 0.00 | health | health | health |
http://abcnews.go.com/Health/father-psych-ward-stabbing-victim-mental-patients-treated/story?id=28559283 | 38.00 | 42.00 | 56.00 | -5.88 | 0.00 | 0.00 | health | health | health |
http://abcnews.go.com/Health/florida-woman-birth-141-pound-baby/story?id=28784382 | 195.00 | 196.00 | 241.00 | 0.00 | 0.00 | 0.00 | health | health | health |
http://abcnews.go.com/Health/renowned-neurologist-oliver-sacks-announces-terminal-cancer/story?id=29084210 | 77.00 | 134.00 | 140.00 | 0.00 | 0.00 | 0.00 | health | health | health |
http://abcnews.go.com/Health/sex-couples-day-biological-children-researchers/story?id=29220568 | 168.00 | 169.00 | 224.00 | 0.00 | 0.00 | 0.00 | health | health | health |
http://abcnews.go.com/Health/year-girl-dies-catching-flu-vaccine/story?id=28526729 | 169.00 | 221.00 | 266.00 | 0.00 | 0.00 | 0.00 | health | health | health |
http://abcnews.go.com/International/International/tiny-penguins-tiny-sweaters/story?id=28886035 | 1384.00 | 2712.00 | 3784.00 | 0.00 | 0.00 | 0.00 | world | world | world |
http://abcnews.go.com/International/air-canada-hard-landing-passengers-lucky-officials/story?id=29984179 | 58.00 | 59.00 | 66.00 | 12.50 | -6.67 | 0.00 | world | world | world |
http://abcnews.go.com/International/american-hostages-mother-us-failed-children/story?id=28803264 | 94.00 | 109.00 | 565.00 | 4.76 | 4.76 | -4.00 | world | world | world |
http://abcnews.go.com/International/american-soldier-christ-fighting-isis-iraq/story?id=29171878 | 51.00 | 57.00 | 263.00 | 0.00 | -3.57 | -3.57 | world | world | world |
http://abcnews.go.com/International/americans-germanwings-plane-official/story?id=29887148 | 17.00 | 32.00 | 89.00 | 0.00 | -7.14 | 0.00 | world | world | world |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
http://www.washingtonpost.com/news/the-intersect/wp/2015/03/03/the-disturbing-case-of-the-bloggers-who-fake-death-and-disease-for-attention/ | 59.00 | 80.00 | 90.00 | -11.11 | -11.11 | -11.11 | news | news | news |
http://www.washingtonpost.com/news/the-intersect/wp/2015/03/09/facebook-censored-a-nude-painting-and-it-could-change-the-site-forever/ | 68.00 | 117.00 | 133.00 | 0.00 | 0.00 | 0.00 | news | news | news |
http://www.washingtonpost.com/news/the-intersect/wp/2015/03/13/what-was-fake-on-the-internet-this-week-putins-death-hillarys-horns-and-marijuana-at-kfc/ | 47.00 | 54.00 | 113.00 | -4.76 | -7.69 | -4.76 | news | news | news |
http://www.washingtonpost.com/news/the-intersect/wp/2015/03/20/what-was-fake-on-the-internet-this-week-smartwatch-cancer-michael-browns-mom-and-the-true-story-of-unfriended/ | 26.00 | 56.00 | 69.00 | 0.00 | -7.69 | -7.69 | news | news | news |
http://www.washingtonpost.com/news/the-intersect/wp/2015/03/23/what-you-dont-know-about-internet-algorithms-is-hurting-you-and-you-probably-dont-know-very-much/ | 148.00 | 181.00 | 183.00 | 4.17 | 4.35 | 4.35 | news | news | news |
http://www.washingtonpost.com/news/the-intersect/wp/2015/04/01/what-is-fake-on-the-internet-today-a-comprehensive-updating-list-of-april-fools-pranks-and-hoaxes/ | 77.00 | 87.00 | 126.00 | -5.88 | -4.17 | -4.55 | news | news | news |
http://www.washingtonpost.com/news/to-your-health/wp/2015/03/09/how-parents-create-narcissistic-children/ | 114.00 | 137.00 | 146.00 | 10.00 | 10.00 | 10.00 | news | news | news |
http://www.washingtonpost.com/opinions/2015/03/27/87655262-d3f4-11e4-a62f-ee745911a4ff_story.html | 47.00 | 54.00 | 76.00 | -5.00 | -5.00 | -5.00 | opinions | opinions | opinions |
http://www.washingtonpost.com/opinions/for-richer-or-poorer-the-challenges-of-marrying-outside-your-class/2015/03/26/cd7ccf72-ccac-11e4-8a46-b1dc9be5a8ff_story.html | 50.00 | 83.00 | 204.00 | 6.25 | 6.25 | 4.54 | opinions | opinions | opinions |
http://www.washingtonpost.com/opinions/pro-discrimination-religious-freedom-laws-are-dangerous-to-america/2015/03/29/bdb4ce9e-d66d-11e4-ba28-f2a685dc7f89_story.html | 217.00 | 390.00 | 435.00 | 0.00 | 0.00 | 0.00 | opinions | opinions | opinions |
http://www.washingtonpost.com/opinions/who-had-the-worst-week-in-washington-rep-aaron-schock/2015/03/20/66809852-ce6f-11e4-a2a7-9517a3a70506_story.html | 21.00 | 34.00 | 35.00 | -6.25 | -6.67 | -6.67 | opinions | opinions | opinions |
http://www.washingtonpost.com/politics/absence-of-2016-competition-for-clinton-raises-stakes-for-democrats/2015/03/11/60fc4ca8-c81d-11e4-a199-6cb5e63819d2_story.html | 26.00 | 36.00 | 43.00 | 5.00 | 5.88 | 5.26 | politics | politics | politics |
http://www.washingtonpost.com/politics/hillary-clinton-to-answer-questions-about-use-of-private-e-mail-server/2015/03/10/4c000d00-c735-11e4-a199-6cb5e63819d2_story.html | 33.00 | 43.00 | 131.00 | 0.00 | 0.00 | 4.00 | politics | politics | politics |
http://www.washingtonpost.com/politics/how-the-white-house-decides-whose-death-is-worth-presidential-notice/2015/03/12/0c43083a-c83d-11e4-a199-6cb5e63819d2_story.html | 47.00 | 51.00 | 65.00 | 0.00 | 0.00 | 0.00 | politics | politics | politics |
http://www.washingtonpost.com/politics/mitt-romney-warms-to-marco-rubio-as-young-senator-cultivates-relationship/2015/03/13/21a769b8-c98d-11e4-a199-6cb5e63819d2_story.html | 26.00 | 49.00 | 60.00 | 5.00 | 0.00 | 0.00 | politics | politics | politics |
http://www.washingtonpost.com/politics/police-suspect-arrested-in-shooting-of-two-officers-in-ferguson/2015/03/15/eb3140c2-cb38-11e4-8a46-b1dc9be5a8ff_story.html | 37.00 | 41.00 | 197.00 | 0.00 | 0.00 | -4.00 | politics | politics | politics |
http://www.washingtonpost.com/politics/secret-service-agents-disrupted-bomb-investigation-at-white-house/2015/03/12/0eb74590-c8c4-11e4-aa1a-86135599fb0f_story.html | 69.00 | 88.00 | 157.00 | 0.00 | 4.76 | 0.00 | politics | politics | politics |
http://www.washingtonpost.com/politics/secret-service-agents-investigated-for-late-night-car-accident-at-white-house/2015/03/11/9c853906-c7ff-11e4-a199-6cb5e63819d2_story.html | 58.00 | 66.00 | 85.00 | 0.00 | 0.00 | 0.00 | politics | politics | politics |
http://www.washingtonpost.com/politics/state-department-reviewing-whether-clinton-e-mail-violated-security-rules/2015/03/05/16d1547e-c378-11e4-9271-610273846239_story.html | 43.00 | 43.00 | 65.00 | 0.00 | 0.00 | 0.00 | politics | politics | politics |
http://www.washingtonpost.com/posteverything/wp/2015/03/14/this-is-why-its-impossible-for-the-kremlin-to-lie-about-putins-weird-disappearance/ | 125.00 | 126.00 | 130.00 | -5.26 | -5.26 | -5.26 | posteverything | posteverything | posteverything |
http://www.washingtonpost.com/posteverything/wp/2015/03/30/youre-not-fooling-everyone-with-your-pretend-laughter/ | 24.00 | 26.00 | 43.00 | 0.00 | 0.00 | 0.00 | posteverything | posteverything | posteverything |
http://www.washingtonpost.com/world/africa/deep-in-the-rain-forest-hunting-for-the-next-ebola-outbreak/2015/03/19/c1cba80e-b78c-11e4-bc30-a4e75503948a_story.html | 57.00 | 63.00 | 108.00 | 4.35 | 0.00 | 4.35 | world | world | world |
http://www.washingtonpost.com/world/after-12-years-in-guantanamo-ex-detainees-find-little-solace-in-uruguay/2015/03/21/4d376006-c1e5-11e4-a188-8e4971d37a8d_story.html | 43.00 | 68.00 | 108.00 | 5.00 | 0.00 | 0.00 | world | world | world |
http://www.washingtonpost.com/world/asia_pacific/north-koreas-growing-economy-and-americas-misconceptions-about-it/2015/03/13/b551d2d0-c1a8-11e4-a188-8e4971d37a8d_story.html | 55.00 | 78.00 | 87.00 | 0.00 | 0.00 | 0.00 | world | world | world |
http://www.washingtonpost.com/world/europe/come-to-rome-for-the-cathedrals-the-ruins--and-the-red-light-district/2015/03/09/880d0440-bd37-11e4-9dfb-03366e719af8_story.html | 37.00 | 38.00 | 52.00 | 4.76 | -5.56 | 4.76 | world | world | world |
http://www.washingtonpost.com/world/europe/report-co-pilot-on-doomed-flight-had-psychological-treatments-in-past/2015/03/27/b1818c48-d40b-11e4-8b1e-274d670aa9c9_story.html | 51.00 | 67.00 | 76.00 | -4.00 | 0.00 | -4.35 | world | world | world |
http://www.washingtonpost.com/world/middle_east/the-islamic-state-is-fraying-from-within/2015/03/08/0003a2e0-c276-11e4-a188-8e4971d37a8d_story.html | 24.00 | 54.00 | 131.00 | -11.76 | 0.00 | 0.00 | world | world | world |
http://www.washingtonpost.com/world/negotiators-hold-marathon-all-night-session-in-last-ditch-effort-for-agreement/2015/04/02/68334c88-d8b2-11e4-bf0b-f648b95a6488_story.html | 51.00 | 68.00 | 102.00 | 6.25 | 0.00 | 5.00 | world | world | world |
http://www.washingtonpost.com/world/pilot-reportedly-locked-out-of-cockpit-before-plane-crashed-into-alpine-mountainside/2015/03/26/460770d8-d38c-11e4-a62f-ee745911a4ff_story.html | 78.00 | 104.00 | 255.00 | 0.00 | -5.56 | -4.35 | world | world | world |
http://www.washingtonpost.com/world/plane-carrying-150-crashes-in-france-apparently-no-survivors/2015/03/24/6fe0fc70-d225-11e4-a62f-ee745911a4ff_story.html | 103.00 | 105.00 | 253.00 | 0.00 | -4.35 | 0.00 | world | world | world |
529 rows × 9 columns
df4 = tn.groupby('href').filter(lambda g: len(g)>3)
gs = gridspec.GridSpec(3, 3)
axs = [plt.subplot(s) for s in (gs[0,0],gs[0,1],gs[0,2],gs[1,0],gs[1,1],gs[1,2],gs[2,:2],gs[2,2])]
for i,o in enumerate(df4.outlet.unique()):
axs[i].set_title(o)
if o == 'CNN' or o =='ABC': axs[i].set_ylim([0,3000])
if o == 'NYT': axs[i].set_ylim([0,2000])
if o == 'WPOST': axs[i].set_ylim([0,1000])
g = sns.stripplot(x="href", y="rt",data=df4[df4.outlet==o],ax=axs[i],jitter=True,size=4)
ylabel = 'Retweet' if i%3 == 0 else ''
xlabel = 'Tweeted News' if i>=6 else ''
axs[i].set(xlabel=xlabel,ylabel=ylabel,ylim=0,xticks=[])
f = plt.gcf()
f.set_size_inches(10, 7, forward=True)
f.suptitle('Retweet Counts of The News Tweeted at Least Four Times',fontsize=18, fontweight='bold')
f.savefig('figs/rt-news-stripplot.png', bbox_inches='tight')
#retweet stats
tw = pd.read_csv('data/LIWC/LIWC2015 Results (tweet-texts).csv',encoding='utf-8')
nan = tw[tw.url.isnull()]
url = tw[~tw.url.isnull()]
pd.DataFrame({'no url':nan.rt.describe(),'any url':url.rt.describe(),'news url':tn.rt.describe()})
any url | news url | no url | |
---|---|---|---|
count | 23255.00 | 16909.00 | 2344.00 |
mean | 135.47 | 134.34 | 206.62 |
std | 355.89 | 259.36 | 759.46 |
min | 0.00 | 1.00 | 0.00 |
25% | 43.00 | 45.00 | 44.00 |
50% | 72.00 | 74.00 | 94.00 |
75% | 133.00 | 134.00 | 207.00 |
max | 36985.00 | 11031.00 | 31123.00 |
# Retweeted News (NOTE: THIS TAKES ~ 3min 32s)
rn = pd.concat([pd.DataFrame([row[1]]*int(1 + row[1].rt/50)) for row in tn.iterrows()])
#sentiment polarity stats
pd.DataFrame({'Published':pn.sp.describe(),'Tweeted':tn.sp.describe(),'Retweeted':rn.sp.describe()})
Published | Retweeted | Tweeted | |
---|---|---|---|
count | 35930.000000 | 53792.000000 | 16909.000000 |
mean | 0.514790 | 0.201119 | 0.124686 |
std | 1.793846 | 1.977284 | 1.914987 |
min | -10.730000 | -8.650000 | -8.650000 |
25% | -0.510000 | -0.990000 | -1.050000 |
50% | 0.550000 | 0.180000 | 0.090000 |
75% | 1.630000 | 1.360000 | 1.260000 |
max | 11.320000 | 14.700000 | 14.700000 |
# we can filter the categories by increasing the number of news required per category
filtr = 100
df = tn.groupby('cat').filter(lambda x: (len(x) > filtr) & (x.name not in 'news article storyline bigstory'.split()))
df = pd.pivot_table(df,values=['sp','rt'],index=['cat'])
ax= sns.regplot('rt','sp',df)
#ax.set_ylim(-1,2)
ax.set(xlabel='Retweet', ylabel='Sentiment Polarity', title='SP vs RT of Categories',ylim=(-1,2))
df.apply(lambda r: ax.annotate(r.name, r.values,xytext=(5,-2), textcoords='offset points'), axis=1);
plt.gcf().savefig('figs/cat-rt-sp-scatter.png')
#mean, median, max of retweets per outlet
pd.pivot_table(tn,index=['outlet'],values=['rt'],aggfunc=[np.mean,np.median,max]).rename(columns={'rt':'Retweet'})
mean | median | max | |
---|---|---|---|
Retweet | Retweet | Retweet | |
outlet | |||
ABC | 184.100213 | 110.5 | 6994 |
AP | 89.745522 | 70.0 | 862 |
CBSNews | 72.585079 | 48.0 | 2458 |
CNN | 396.500725 | 248.0 | 7752 |
FoxNews | 134.664111 | 89.0 | 3122 |
NBCNews | 85.786790 | 55.0 | 11031 |
NYT | 139.529051 | 84.0 | 8917 |
WPOST | 83.048544 | 59.0 | 3683 |
# density plot of retweets
ax = [sns.kdeplot(tn[tn.outlet==o].rt,label=o) for o in outlets][0]
ax.set(xlim=(0,500),title='Retweet Distributions',xlabel='Retweet Count',ylabel='Density');
# sentiment polarities [posemo - negemo] of news
c = 'sp'
sns.kdeplot(pn[c],label='Published News')
sns.kdeplot(tn[c],label='Tweeted News')
ax = sns.kdeplot(rn[c],label='Retweeted News')
ax.set_title('Comparing Sentiment Polarity of News Published/Tweeted/Retweeted - All Outlets Combined')
ax.set(xlim=(-10,10),xlabel='Sentiment Polarity',ylabel='Density')
plt.savefig('figs/sentiment-comparison-published-tweeted-retweeted.png',bbox_inches='tight')
sns.kdeplot(pn['Tone'],label='Published News')#.set(xlim=splim)
sns.kdeplot(tn['Tone'],label='Tweeted News')#.set(xlim=splim)
ax = sns.kdeplot(rn['Tone'],label='Retweeted News')
ax.set_title('Comparing Tone of News Published/Tweeted/Retweeted - All Outlets Combined')
ax.set_xlabel('Tone')
ax.set_ylabel('Density')
plt.savefig('figs/tone-comparison-published-tweeted-retweeted.png',bbox_inches='tight')
df = pn
ax = [sns.kdeplot(df[df['outlet']==o]['sp'],label=o) for o in outlets][0]
ax.set(xlim=(-10,10)) # = sns.kdeplot(df['sp'],label='Avg',color='black')
ax.set_title('Sentiment Polarities of Published News')
ax.set_xlabel('Sentiment Polarity')
ax.set_ylabel('Density')
plt.savefig('figs/sentiment-published.png',bbox_inches='tight')
df = tn
ax = [sns.kdeplot(df[df['outlet']==o]['sp'],label=o) for o in outlets][0]
ax.set(xlim=(-10,10)) # = sns.kdeplot(df['sp'],label='Avg',color='black')
ax.set_title('Sentiment Polarities of Tweeted News')
ax.set_xlabel('Sentiment Polarity')
ax.set_ylabel('Density')
plt.savefig('figs/sentiment-tweeted.png',bbox_inches='tight')
df = rn
ax = [sns.kdeplot(df[df['outlet']==o]['sp'],label=o) for o in outlets][0]
ax.set(xlim=(-10,10)) # = sns.kdeplot(df['sp'],label='Avg',color='black')
ax.set_title('Sentiment Polarities of Retweeted News')
ax.set_xlabel('Sentiment Polarity')
ax.set_ylabel('Density')
plt.savefig('figs/sentiment-retweeted.png',bbox_inches='tight')
# Each outlet in a separate plot
f,axes = plt.subplots(nrows=4,ncols=2,figsize=(16,16),subplot_kw={'xlim': (-10,10)});#,sharex=True,sharey=True
#f.tight_layout(h_pad=4,rect=(0,0,1,0.97))
plt.subplots_adjust(hspace=0.3,wspace=0.1)
for i,o in enumerate(outlets):
ax = axes[i/2][i%2]
ax.set_title(o,fontsize=14)
ax.set_xlabel('Sentiment Polarity')
ax.set_ylabel('Density')
sns.kdeplot(pn[pn['outlet']==o]['sp'],label='all news',ax=ax)
sns.kdeplot(tn[tn['outlet']==o]['sp'],label='tweeted',ax=ax)
sns.kdeplot(rn[rn['outlet']==o]['sp'],label='retweeted',ax=ax)
f.suptitle('Sentiment Scores per Outlet', fontsize=20,y=0.93)
plt.savefig('figs/sentiment-per-outlet.png',bbox_inches='tight')
df = pd.pivot_table(pn,index=['outlet'],values=['sp'],aggfunc=np.mean).rename(columns={'sp':'Published'})
df = df.join(pd.pivot_table(tn,index=['outlet'],values=['sp'],aggfunc=np.mean).rename(columns={'sp':'Tweeted'}))
df = df.join(pd.pivot_table(rn,index=['outlet'],values=['sp'],aggfunc=np.mean).rename(columns={'sp':'Retweeted'}))
df
Published | Tweeted | Retweeted | |
---|---|---|---|
outlet | |||
ABC | 0.997326 | 0.347335 | 0.568554 |
AP | 0.409545 | -0.178651 | -0.265285 |
CBSNews | 0.247449 | -0.109504 | -0.132829 |
CNN | 0.321695 | 0.224725 | 0.173701 |
FoxNews | 0.712249 | -0.007499 | -0.004798 |
NBCNews | 0.860164 | -0.264770 | -0.093226 |
NYT | 0.756503 | 0.357766 | 0.430961 |
WPOST | 0.653282 | 0.570105 | 0.542642 |
ax = df.plot()
ax.set_title('Sentiment Polarities of News Averaged per Outlet')
ax.set_ylabel('Sentiment Polarity')
ax.set_xlabel('Outlets')
plt.savefig('figs/sentiment-averages-per-outlet.png',bbox_inches='tight')
cs = ['politics','sports']#,'world'
ax = [sns.kdeplot(tn[tn['cat']==c]['sp'],label=c+' tweeted') for c in cs][0]
[sns.kdeplot(rn[rn['cat']==c]['sp'],label=c+' retweeted',linestyle='--',color=sns.color_palette()[i]) for i,c in enumerate(cs)]
ax.set(xlim=(-7,7),title='Sentiment Distrubition of Politics and Sports News',xlabel='Sentiment Polarity',ylabel='Density')
fname= '-'.join(cs)
plt.savefig('figs/tweeted-'+fname+'.png',bbox_inches='tight')
outlets.remove('AP')
outlets.remove('CBSNews')
splim=(-10, 10)
sns.mpl.rc("figure", figsize=(10,4))
c='politics'
f,axes = plt.subplots(nrows=3,ncols=2,figsize=(16,12),subplot_kw={'xlim': splim});#,sharex=True,sharey=True
plt.subplots_adjust(hspace=0.3,wspace=0.1)
for i,o in enumerate(outlets):
ax = axes[i/2][i%2]
ax.set_title(o,fontsize=14)
ax.set_xlabel('Sentiment Polarity')
ax.set_ylabel('Density')
tw = tn[(tn['cat']==c) & (tn['outlet']==o)]['sp']
rt = rn[(rn['cat']==c) & (rn['outlet']==o)]['sp']
sns.kdeplot(tw,label=c+' tweeted',ax=ax)
sns.kdeplot(rt,label=c+' retweeted',ax=ax)
f.suptitle('Sentiment Scores for "Politics" per Outlet', fontsize=20,y=0.94)
plt.savefig('figs/politics-per-outlet.png',bbox_inches='tight')
splim=(-10, 10)
sns.mpl.rc("figure", figsize=(10,4))
c='sports'
f,axes = plt.subplots(nrows=3,ncols=2,figsize=(16,12),subplot_kw={'xlim': splim});#,sharex=True,sharey=True
plt.subplots_adjust(hspace=0.3,wspace=0.1)
for i,o in enumerate(outlets):
ax = axes[i/2][i%2]
ax.set_title(o,fontsize=14)
if o == 'CNN':
ax.text(0.5, 0.5, 'Category N/A',
horizontalalignment='center',
verticalalignment='center',
fontsize=16, color='red',
transform=ax.transAxes)
continue
ax.set_xlabel('Sentiment Polarity')
ax.set_ylabel('Density')
tw = tn[(tn['cat']==c) & (tn['outlet']==o)]['sp']
rt = rn[(rn['cat']==c) & (rn['outlet']==o)]['sp']
sns.kdeplot(tw,label=c+' tweeted',ax=ax)
sns.kdeplot(rt,label=c+' retweeted',ax=ax)
f.suptitle('Sentiment Scores for "Sports" per Outlet', fontsize=20,y=0.94)
plt.savefig('figs/sports-per-outlet.png',bbox_inches='tight')
c = 'politics'
df = pd.pivot_table(tn[tn['cat']==c],index=['outlet'],values=['sp'],aggfunc=np.mean).rename(columns={'sp':'Tweeted'})
df = df.join(pd.pivot_table(rn[rn['cat']==c],index=['outlet'],values=['sp'],aggfunc=np.mean).rename(columns={'sp':'Retweeted'}))
df
Tweeted | Retweeted | |
---|---|---|
outlet | ||
ABC | 0.427611 | 0.289167 |
CNN | 0.399042 | 0.256381 |
FoxNews | 0.435667 | 0.366697 |
NBCNews | 0.863457 | 1.445029 |
NYT | 0.414118 | 0.316609 |
WPOST | 0.459786 | 0.351538 |
c = 'sports'
df = pd.pivot_table(tn[tn['cat']==c],index=['outlet'],values=['sp'],aggfunc=np.mean).rename(columns={'sp':'Tweeted'})
df = df.join(pd.pivot_table(rn[rn['cat']==c],index=['outlet'],values=['sp'],aggfunc=np.mean).rename(columns={'sp':'Retweeted'}))
df
Tweeted | Retweeted | |
---|---|---|
outlet | ||
ABC | 1.908525 | 2.193559 |
CNN | 2.652000 | 2.555312 |
FoxNews | 0.740492 | 1.086916 |
NBCNews | 0.516452 | 0.525119 |
NYT | 1.554690 | 1.531263 |
WPOST | 0.653333 | 0.846667 |
#Any correlation between polarity of the tweet text and retweeting?
tn[['rt','sp','sp_t']].corr()
rt | sp | sp_t | |
---|---|---|---|
rt | 1.000000 | 0.024777 | 0.017971 |
sp | 0.024777 | 1.000000 | 0.424538 |
sp_t | 0.017971 | 0.424538 | 1.000000 |