import pandas as pd
import numpy as np
import folium
from IPython.display import HTML
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')
# read in election and twitter datasets in tabular format
df = pd.read_csv('../data/governors-challengers.csv')
ch = pd.read_table('../data/tw_ch.csv',usecols=['screen_name','followers_count'], encoding='utf-16')
gov = pd.read_table('../data/tw_gov.csv',usecols=['screen_name','followers'])
#rename column names for clarification and seamless merging
ch = ch.rename(columns={'screen_name':'twch','followers_count':'folch'})
gov = gov.rename(columns={'screen_name':'twgov','followers':'folgov'})
# map of Incumbent Governors Vote Shares (%)
mapname = 'gov_share'
state_geo = 'us_states.geojson'
states = folium.Map(location=[40, -99], zoom_start=4)
states.geo_json(geo_path=state_geo, data=df, data_out=mapname+'.json',
columns=['state', 'shareGov'],
threshold_scale=[45,50,55,60,65,70],
key_on='feature.properties.name',
fill_color='PuRd', fill_opacity=0.7, line_opacity=0.2,
legend_name='Incumbent Governor Vote Share (%)')
states.create_map(path=mapname+'.html')
HTML('<iframe src='+mapname+'.html style="width: 100%; height: 500px; border: none"></iframe>')
#let's merge on screen names
df = df.merge(gov)
df = df.merge(ch)
# governors' twitter followers share (only considering the primary challenger)
df['twshare'] = 100 * df.folgov / (df.folgov + df.folch)
# similarly, update shareGov field
df['shareGov'] = 100 * df.shareGov / (df.shareGov + df.shareCh)
# get the vote share - twitter share diff
df['shareDiff'] = abs(df['twshare'] - df['shareGov'])
df
state | party | since | twgov | twch | shareGov | shareCh | voterCnt | population | folgov | folch | twshare | shareDiff | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Alabama | R | 2011 | GovernorBentley | Griffith2014 | 63.600000 | 36.40 | 1174575 | 4849377 | 29422 | 947 | 96.881689 | 33.281689 |
1 | Alaska | I | 2014 | AkGovBillWalker | SeanParnellAK | 50.899471 | 46.40 | 243597 | 736732 | 985 | 6198 | 13.712933 | 37.186538 |
2 | Arizona | R | 2015 | dougducey | FredDuVal | 56.256572 | 41.60 | 1492915 | 6731484 | 9586 | 6428 | 59.860122 | 3.603550 |
3 | Arkansas | R | 2015 | AsaHutchinson | MikeRossUpdates | 57.172343 | 41.50 | 844442 | 2966369 | 6761 | 3513 | 65.806891 | 8.634549 |
4 | California | D | 2011 | JerryBrownGov | neelkashkari | 59.400000 | 40.60 | 6496307 | 38802500 | 1046725 | 6253 | 99.406160 | 40.006160 |
5 | Colorado | D | 2011 | hickforco | bobbeauprez | 51.521511 | 46.20 | 1992332 | 5355866 | 41798 | 3602 | 92.066079 | 40.544568 |
6 | Connecticut | D | 2011 | GovMalloyOffice | TomFoleyCT | 51.263903 | 48.20 | 1081315 | 3596677 | 45679 | 3126 | 93.594919 | 42.331016 |
7 | Florida | R | 2011 | FLGovScott | CharlieCrist | 50.577125 | 47.10 | 5940898 | 19893297 | 53922 | 24326 | 68.911665 | 18.334541 |
8 | Georgia | R | 2011 | GovernorDeal | carter4governor | 54.098361 | 44.80 | 2539788 | 10097343 | 20108 | 8687 | 69.831568 | 15.733207 |
9 | Hawaii | D | 2014 | GovHawaii | DukeAiona2014 | 57.159353 | 37.10 | 366125 | 1419561 | 599 | 4558 | 11.615280 | 45.544073 |
10 | Idaho | R | 2007 | ButchOtter | AJYoureALiberal | 58.089034 | 38.60 | 439609 | 1634464 | 10652 | 100 | 99.069940 | 40.980907 |
11 | Illinois | R | 2015 | BruceRauner | GovernorQuinn | 52.533609 | 45.90 | 3508302 | 12880580 | 19654 | 20236 | 49.270494 | 3.263115 |
12 | Indiana | R | 2013 | GovPenceIN | GreggForGov | 51.666667 | 46.40 | 2556910 | 6596855 | 56825 | 3164 | 94.725700 | 43.059033 |
13 | Iowa | R | 2011 | TerryBranstad | Senator_Hatch | 61.307054 | 37.30 | 1125055 | 3107126 | 14696 | 2419 | 85.866199 | 24.559145 |
14 | Kansas | R | 2011 | govsambrownback | PaulDavisKS | 52.029136 | 46.10 | 847988 | 2904021 | 11728 | 5714 | 67.239995 | 15.210859 |
15 | Kentucky | D | 2007 | GovSteveBeshear | williamsfarmer | 61.208791 | 35.30 | 833139 | 4413457 | 20110 | 522 | 97.469950 | 36.261158 |
16 | Louisiana | R | 2008 | BobbyJindal | Hollis4Governor | 78.632887 | 17.88 | 1023163 | 4649676 | 174123 | 47 | 99.973015 | 21.340128 |
17 | Maine | R | 2011 | Governor_LePage | Michaud2014 | 52.677596 | 43.30 | 609963 | 1330089 | 4111 | 2496 | 62.221886 | 9.544290 |
18 | Maryland | R | 2015 | ChangeMaryland | BrownforMD | 52.385787 | 46.90 | 1641419 | 5976407 | 5465 | 3428 | 61.452828 | 9.067041 |
19 | Massachusetts | R | 2015 | MassGovernor | marthacoakley | 50.998948 | 46.60 | 2149380 | 6745408 | 117246 | 27312 | 81.106545 | 30.107597 |
20 | Michigan | R | 2011 | onetoughnerd | MarkSchauer | 52.147239 | 46.80 | 3137941 | 9909877 | 42558 | 5620 | 88.334925 | 36.187685 |
21 | Mississippi | R | 2012 | PhilBryantMS | dupreeforgov | 61.000000 | 39.00 | 893468 | 2994079 | 18261 | 584 | 96.901035 | 35.901035 |
22 | Missouri | D | 2009 | GovJayNixon | spenceformo | 56.217883 | 42.60 | 2715818 | 6063589 | 22524 | 2089 | 91.512615 | 35.294732 |
23 | Montana | D | 2013 | GovernorBullock | RickHill2012 | 50.882658 | 47.30 | 479264 | 1023579 | 5564 | 375 | 93.685806 | 42.803147 |
24 | Nebraska | R | 2015 | GovRicketts | Vote4Chuck | 59.689119 | 38.90 | 524068 | 1881503 | 740 | 2169 | 25.438295 | 34.250824 |
25 | Nevada | R | 2011 | GovSandoval | Goodman4Nevada | 74.708995 | 23.90 | 547582 | 2839099 | 11356 | 58 | 99.491852 | 24.782857 |
26 | New Hampshire | D | 2013 | GovernorHassan | Walt4NH | 52.600000 | 47.40 | 485534 | 1326813 | 8379 | 641 | 92.893570 | 40.293570 |
27 | New Jersey | R | 2010 | GovChristie | SenatorBuono | 61.319797 | 38.10 | 2073642 | 8938175 | 505401 | 3598 | 99.293122 | 37.973325 |
28 | New Mexico | R | 2011 | Gov_Martinez | GaryKingforGov | 57.300000 | 42.70 | 503096 | 2085572 | 13874 | 1877 | 88.083296 | 30.783296 |
29 | New York | D | 2011 | NYGovCuomo | RobAstorino | 57.082452 | 40.60 | 1829387 | 19746227 | 145605 | 11491 | 92.685364 | 35.602912 |
30 | North Carolina | R | 2013 | PatMcCroryNC | WalterDalton | 55.873340 | 43.20 | 4474892 | 9943964 | 27965 | 1667 | 94.374325 | 38.500985 |
31 | North Dakota | R | 2010 | NDGovDalrymple | TaylorforND | 64.820513 | 34.30 | 316336 | 739482 | 332 | 594 | 35.853132 | 28.967381 |
32 | Ohio | R | 2011 | JohnKasich | FitzGeraldForOH | 65.977249 | 32.90 | 3011052 | 11594163 | 67029 | 5905 | 91.903639 | 25.926390 |
33 | Oklahoma | R | 2011 | GovMaryFallin | JoeD4OK | 57.644628 | 41.00 | 823267 | 3878051 | 20880 | 6721 | 75.649433 | 18.004805 |
34 | Oregon | D | 2015 | GovKitz | DRichardsonOR | 52.698413 | 44.70 | 1342101 | 3970239 | 20809 | 1446 | 93.502584 | 40.804171 |
35 | Pennsylvania | D | 2015 | WolfForPA | CorbettforGov | 54.900000 | 45.10 | 3459375 | 12787209 | 10187 | 7131 | 58.823190 | 3.923190 |
36 | Rhode Island | D | 2015 | GinaRaimondo | MayorFung | 52.918288 | 36.30 | 322324 | 1055173 | 11006 | 4354 | 71.653646 | 18.735358 |
37 | South Carolina | R | 2011 | nikkihaley | vincentsheheen | 57.494867 | 41.40 | 1231757 | 4832482 | 98004 | 4078 | 96.005172 | 38.510306 |
38 | South Dakota | R | 2011 | SDGovDaugaard | SusanForSD | 73.514077 | 25.40 | 277248 | 853175 | 7142 | 981 | 87.923181 | 14.409104 |
39 | Texas | R | 2015 | GregAbbott_TX | WendyDavisTexas | 60.386965 | 38.90 | 4707388 | 26956958 | 58374 | 171906 | 25.349140 | 35.037825 |
40 | Utah | R | 2009 | GovHerbert | PeterSCooke | 71.175858 | 27.70 | 913696 | 2942902 | 13827 | 630 | 95.642249 | 24.466391 |
41 | Vermont | D | 2011 | GovPeterShumlin | MilneForVT | 50.599782 | 45.30 | 193603 | 626562 | 13126 | 299 | 97.772812 | 47.173030 |
42 | Virginia | D | 2014 | GovernorVA | KenCuccinelli | 51.336898 | 45.50 | 2222840 | 8326289 | 22611 | 18351 | 55.199941 | 3.863043 |
43 | Washington | D | 2013 | GovInslee | robmckenna | 51.200000 | 48.80 | 2851938 | 7061530 | 15530 | 5917 | 72.411060 | 21.211060 |
44 | West Virginia | D | 2010 | GovTomblin | MaloneyforWV | 52.494802 | 45.70 | 656034 | 1850326 | 6038 | 1006 | 85.718342 | 33.223539 |
45 | Wisconsin | R | 2011 | GovWalker | Burke4WI | 52.881699 | 46.60 | 2408096 | 5757564 | 100413 | 11232 | 89.939541 | 37.057842 |
46 | Wyoming | R | 2011 | GovMattMead | gosar4gov | 68.380744 | 28.90 | 156914 | 584153 | 1341 | 367 | 78.512881 | 10.132137 |
states = folium.Map(location=[40, -99], zoom_start=4)
states.geo_json(geo_path=state_geo, data=df, data_out='govtw_share.json',
columns=['state', 'twshare'],
threshold_scale=[50,60,70,80,90,99],
key_on='feature.properties.name',
fill_color='PuRd', fill_opacity=0.7, line_opacity=0.2,
legend_name='Incumbent Governors Twitter Follower Share (%)')
states.create_map(path='govtw_share.html')
HTML('<iframe src="govtw_share.html" style="width: 100%; height: 510px; border: none"></iframe>')
mapname = 'share_diff'
states = folium.Map(location=[40, -99], zoom_start=4)
states.geo_json(geo_path=state_geo, data=df, data_out=mapname+'.json',
threshold_scale=[5, 10, 15, 20, 30, 40],
columns=['state', 'shareDiff'],
key_on='feature.properties.name',
fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2,
legend_name='The Difference Between Governors Vote Share & Twitter Follower Share (%)')
states.create_map(path=mapname+'.html')
HTML('<iframe src="'+mapname+'.html" style="width: 100%; height: 510px; border: none"></iframe>')
ax = df[['twshare','shareGov']].plot(x = df['state'], xticks=range(len(df)), rot=75,figsize=(15,3))
ax.legend(['Twitter share','Vote share'],loc='best');
# correlation between twitter share of governors
# and their 'normalized' vote share
df[['twshare','shareGov']].corr()
twshare | shareGov | |
---|---|---|
twshare | 1.00000 | 0.19558 |
shareGov | 0.19558 | 1.00000 |
ax = df.plot(x='twshare', y='shareGov', kind='scatter', figsize=(15,15),xlim=(0,100),ylim=(50,80))
df.apply(lambda x: ax.annotate(x['state'], (x['twshare'],x['shareGov']),
xytext=(-40, 7), textcoords='offset points',fontsize=14), axis=1);
ax.set_xlabel("Normalized Vote Share of Sitting Governor",fontsize=14);
ax.set_ylabel("Normalized Twitter Follower Share of Sitting Governor",fontsize=14);
ax.set_title('Twitter Follower Share vs Vote Share of Sitting US Governors',fontsize=18);
ax.text(0.01,0.99,'Correlation between Twitter share and vote share : '+
'{:2.2f}'.format(df[['twshare','shareGov']].corr().ix[0,1]),
horizontalalignment='left',verticalalignment='top',
transform=ax.transAxes,fontsize=12);
# perform clustering and plot the dendrogram
from scipy.cluster.hierarchy import linkage, dendrogram
from scipy.spatial.distance import pdist, squareform
# compute distance matrix
distxy = squareform(pdist(df[['twshare','shareGov']], metric='euclidean'))
ax = plt.subplot(111)
ax.figure.set_size_inches(5,10)
l = linkage(distxy,method='complete')
dendrogram(l,labels=df['state'].tolist(),orientation='right');
# states whose incumbent governors have less followers than their challengers...
df[df['folch']>df['folgov']]
state | party | since | twgov | twch | shareGov | shareCh | voterCnt | folgov | folch | twshare | shareDiff | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | Alaska | I | 2014 | AkGovBillWalker | SeanParnellAK | 50.899471 | 46.4 | 243597 | 985 | 6198 | 13.712933 | 37.186538 |
9 | Hawaii | D | 2014 | GovHawaii | DukeAiona2014 | 57.159353 | 37.1 | 366125 | 599 | 4558 | 11.615280 | 45.544073 |
11 | Illinois | R | 2015 | BruceRauner | GovernorQuinn | 52.533609 | 45.9 | 3508302 | 19654 | 20236 | 49.270494 | 3.263115 |
24 | Nebraska | R | 2015 | GovRicketts | Vote4Chuck | 59.689119 | 38.9 | 524068 | 740 | 2169 | 25.438295 | 34.250824 |
31 | North Dakota | R | 2010 | NDGovDalrymple | TaylorforND | 64.820513 | 34.3 | 316336 | 332 | 594 | 35.853132 | 28.967381 |
39 | Texas | R | 2015 | GregAbbott_TX | WendyDavisTexas | 60.386965 | 38.9 | 4707388 | 58374 | 171906 | 25.349140 | 35.037825 |
import json
from collections import Counter
f = json.load(open('../data/geocoded.json'))
c2 = Counter()
c = Counter()
for state in f:
#print(state,f[state]['flocs'])
c2.update(f[state]['flocs'].keys())
c.update(f[state]['flocs'])
c2.most_common(10)
[('33.528370,-112.076300', 50), ('38.880344,-77.108260', 50), ('38.913611,-77.013222', 50), ('39.307956,-76.617016', 49), ('39.998012,-75.144793', 49), ('39.726287,-104.965486', 48), ('38.816242,-77.071282', 48), ('42.321597,-71.089115', 48), ('33.759506,-84.403176', 48), ('40.441419,-79.977292', 47)]
fig = plt.figure(figsize=(16,8))
ax = plt.gca()
ax.scatter(range(len(c2)),sorted(list(c2.values()),reverse = True))
#ax.set_yscale('log')
#ax.set_xscale('log')
ax.set_xlim(-100,len(c2)+100)
ax.set_ylim(0,c2.most_common(1)[0][1]+1)
ax.set_xlabel('Cities (ranked)')
ax.set_ylabel('Unique governors followed')
ax.set_title('Number of Unique Governors Followed by US cities');
plt.loglog(range(len(c2)),sorted(list(c2.values()),reverse = True));
import powerlaw
fit = powerlaw.Fit(list(c.values()))
print(fit.alpha,fit.sigma)
#print(fit.distribution_compare('power_law', 'lognormal'))
1.99584757388 0.0346080955597
Calculating best minimal value for power law fit
fig = plt.figure(figsize=(16,8))
ax = plt.gca()
ax.scatter(range(len(c)),sorted(list(c.values()),reverse = True))
ax.set_yscale('log')
#ax.set_xscale('log')
ax.set_xlim(-100,len(c)+100)
ax.set_ylim(0.9,c.most_common(1)[0][1]+5000)
ax.set_xlabel('Cities (ranked)')
ax.set_ylabel('Residents following at least one governor')
ax.set_title('Total Number of Residents Following at least one Governor');
# http://code.xster.net/pygeocoder/wiki/Home
from pygeocoder import Geocoder
for i in range(10):
x = c2.most_common(10)[i]
results = Geocoder.reverse_geocode(
float(x[0].split(',')[0]),
float(x[0].split(',')[1]))
print(results, '\t # of govs followed:',x[1])
116 West Rose Lane, Phoenix, AZ 85013, USA # of govs followed: 50 4075 Wilson Boulevard, Arlington, VA 22203, USA # of govs followed: 50 139 Randolph Place Northwest, Washington, DC 20001, USA # of govs followed: 50 North Charles Street & Interstate 83, Baltimore, MD 21201, USA # of govs followed: 49 3023 North 9th Street, Philadelphia, PA 19133, USA # of govs followed: 49 636 Williams Street, Denver, CO 80218, USA # of govs followed: 48 2729 King Street, Alexandria, VA 22302, USA # of govs followed: 48 146 Dale Street, Boston, MA 02119, USA # of govs followed: 48 182 Northside Drive Northwest, Atlanta, GA 30314, USA # of govs followed: 48 Elmore Street, Pittsburgh, PA 15219, USA # of govs followed: 47
states = folium.Map(location=[40, -99], zoom_start=4, tiles='Mapbox Bright')
for k,v in c2.items():
states.circle_marker(location=k.split(','), radius=v*10)
states.create_map(path='unique_govs.html')
HTML('<iframe src="unique_govs.html" style="width: 100%; height: 500px; border: none"></iframe>')
import json
from collections import Counter
f = json.load(open('../data/geocoded.json'))
c = Counter()
for state in f:
#print(state,f[state]['flocs'])
c.update(f[state]['flocs'])
# http://code.xster.net/pygeocoder/wiki/Home
from pygeocoder import Geocoder
for i in range(10):
x = c.most_common(10)[i]
lat,lon = x[0].split(',')
results = Geocoder.reverse_geocode(float(lat), float(lon))
print(results, '\t # of followers:',x[1])
139 Randolph Place Northwest, Washington, DC 20001, USA # of followers: 26485 146 Dale Street, Boston, MA 02119, USA # of followers: 14453 182 Northside Drive Northwest, Atlanta, GA 30314, USA # of followers: 8384 636 Williams Street, Denver, CO 80218, USA # of followers: 8379 1017 West 31st Street, Austin, TX 78705, USA # of followers: 6633 2801 South Leavitt Street, Chicago, IL 60608, USA # of followers: 6432 2513 East Burnside Street, Portland, OR 97214, USA # of followers: 5856 Buffalo Bayou Bike Trail, Houston, TX 77019, USA # of followers: 5521 1816 Portland Avenue, Minneapolis, MN 55404, USA # of followers: 4804 3814 Munger Avenue, Dallas, TX 75204, USA # of followers: 4674
states = folium.Map(location=[40, -99], zoom_start=4, tiles='Mapbox Bright')
for k,v in c.items():
states.circle_marker(location=k.split(','), radius=v)
states.create_map(path='all_followers.html')
HTML('<iframe src="all_followers.html" style="width: 100%; height: 500px; border: none"></iframe>')
from mpl_toolkits.basemap import Basemap
from matplotlib.path import Path
# Mercator Projection
# http://matplotlib.org/basemap/users/merc.html
m = Basemap(projection='merc', llcrnrlat=-80, urcrnrlat=80,
llcrnrlon=-180, urcrnrlon=180, lat_ts=20, resolution='c')
# Poly vertices
p = [[25.774252, -80.190262], [18.466465, -66.118292], [32.321384, -64.75737]]
# Projected vertices
p_projected = [m(x[1], x[0]) for x in p]
# Create the Path
p_path = Path(p_projected)
# Test points
p1 = [27.254629577800088, -76.728515625]
p2 = [27.254629577800088, -74.928515625]
# Test point projection
p1_projected = m(p1[1], p1[0])
p2_projected = m(p2[1], p2[0])
print(p_path.contains_point(p1_projected))
print(p_path.contains_point(p2_projected))
1 1