# pyscience imports
import numpy as np
import pandas as pd
import janitor
import pandas_flavor as pf
# viz
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="ticks", context="talk")
font = {'family' : 'IBM Plex Sans',
'weight' : 'normal',
'size' : 8}
plt.rc('font', **font)
plt.rcParams['figure.figsize'] = (10, 10)
# geodata packages
import geopandas as gpd
import geoplot as gplt
import mapclassify as mc
# show all output
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
states_geojson = "https://eric.clst.org/assets/wiki/uploads/Stuff/gz_2010_us_040_00_500k.json"
states = gpd.read_file(states_geojson)
states.head()
GEO_ID | STATE | NAME | LSAD | CENSUSAREA | geometry | |
---|---|---|---|---|---|---|
0 | 0400000US23 | 23 | Maine | 30842.923 | MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... | |
1 | 0400000US25 | 25 | Massachusetts | 7800.058 | MULTIPOLYGON (((-70.83204 41.60650, -70.82373 ... | |
2 | 0400000US26 | 26 | Michigan | 56538.901 | MULTIPOLYGON (((-88.68443 48.11579, -88.67563 ... | |
3 | 0400000US30 | 30 | Montana | 145545.801 | POLYGON ((-104.05770 44.99743, -104.25015 44.9... | |
4 | 0400000US32 | 32 | Nevada | 109781.180 | POLYGON ((-114.05060 37.00040, -114.04999 36.9... |
states.sort_values('NAME').head()
GEO_ID | STATE | NAME | LSAD | CENSUSAREA | geometry | |
---|---|---|---|---|---|---|
18 | 0400000US01 | 01 | Alabama | 50645.326 | MULTIPOLYGON (((-85.00237 31.00068, -85.02411 ... | |
19 | 0400000US02 | 02 | Alaska | 570640.950 | MULTIPOLYGON (((-164.97620 54.13459, -164.9377... | |
20 | 0400000US04 | 04 | Arizona | 113594.084 | POLYGON ((-109.04522 36.99908, -109.04524 36.9... | |
21 | 0400000US05 | 05 | Arkansas | 52035.477 | POLYGON ((-94.55929 36.49950, -94.51948 36.499... | |
22 | 0400000US06 | 06 | California | 155779.220 | MULTIPOLYGON (((-122.44632 37.86105, -122.4385... |
# https://doi.org/10.7910/DVN/42MVDX
# stupid dataverse doesn't expose links
state_lev_prez = pd.read_csv("/home/alal/Dropbox/_Data/US/USPresidentialElections/1976-2016-president.csv").clean_names()
state_lev_prez.head()
state_lev_prez.info()
year | state | state_po | state_fips | state_cen | state_ic | office | candidate | party | writein | candidatevotes | totalvotes | version | notes | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1976 | Alabama | AL | 1 | 63 | 41 | US President | Carter, Jimmy | democrat | False | 659170 | 1182850 | 20171015 | NaN |
1 | 1976 | Alabama | AL | 1 | 63 | 41 | US President | Ford, Gerald | republican | False | 504070 | 1182850 | 20171015 | NaN |
2 | 1976 | Alabama | AL | 1 | 63 | 41 | US President | Maddox, Lester | american independent party | False | 9198 | 1182850 | 20171015 | NaN |
3 | 1976 | Alabama | AL | 1 | 63 | 41 | US President | Bubar, Benjamin ""Ben"" | prohibition | False | 6669 | 1182850 | 20171015 | NaN |
4 | 1976 | Alabama | AL | 1 | 63 | 41 | US President | Hall, Gus | communist party use | False | 1954 | 1182850 | 20171015 | NaN |
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3740 entries, 0 to 3739 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 year 3740 non-null int64 1 state 3740 non-null object 2 state_po 3740 non-null object 3 state_fips 3740 non-null int64 4 state_cen 3740 non-null int64 5 state_ic 3740 non-null int64 6 office 3740 non-null object 7 candidate 3474 non-null object 8 party 3404 non-null object 9 writein 3740 non-null bool 10 candidatevotes 3740 non-null int64 11 totalvotes 3740 non-null int64 12 version 3740 non-null int64 13 notes 0 non-null float64 dtypes: bool(1), float64(1), int64(7), object(5) memory usage: 383.6+ KB
elec_2016 = state_lev_prez.query("year == 2016").loc[:, ['state', 'state_po', 'state_fips', 'party', 'candidatevotes']]
elec_2016.shape
elec_2016.head()
(345, 5)
state | state_po | state_fips | party | candidatevotes | |
---|---|---|---|---|---|
3395 | Alabama | AL | 1 | republican | 1318255 |
3396 | Alabama | AL | 1 | democrat | 729547 |
3397 | Alabama | AL | 1 | libertarian | 44467 |
3398 | Alabama | AL | 1 | NaN | 21712 |
3399 | Alabama | AL | 1 | green | 9391 |
elec_2016_2p = elec_2016.loc[elec_2016.party.isin(['republican', 'democrat'])][['state', 'state_fips', 'party', 'candidatevotes']]
elec_2016_2p.head()
state | state_fips | party | candidatevotes | |
---|---|---|---|---|
3395 | Alabama | 1 | republican | 1318255 |
3396 | Alabama | 1 | democrat | 729547 |
3400 | Alaska | 2 | republican | 163387 |
3401 | Alaska | 2 | democrat | 116454 |
3407 | Arizona | 4 | republican | 1252401 |
elec_2016_2p.set_index(['state', 'state_fips', 'party'], inplace = True)
votes_wide = elec_2016_2p.groupby(level = [0,1, 2]).sum().unstack().reset_index()
votes_wide.head()
state | state_fips | candidatevotes | ||
---|---|---|---|---|
party | democrat | republican | ||
0 | Alabama | 1 | 729547 | 1318255 |
1 | Alaska | 2 | 116454 | 163387 |
2 | Arizona | 4 | 1161209 | 1252401 |
3 | Arkansas | 5 | 380494 | 684872 |
4 | California | 6 | 8753788 | 4483810 |
votes_wide.columns = ['state', 'fips', 'd_votes', 'r_votes']
votes_wide.head()
state | fips | d_votes | r_votes | |
---|---|---|---|---|
0 | Alabama | 1 | 729547 | 1318255 |
1 | Alaska | 2 | 116454 | 163387 |
2 | Arizona | 4 | 1161209 | 1252401 |
3 | Arkansas | 5 | 380494 | 684872 |
4 | California | 6 | 8753788 | 4483810 |
votes_wide.eval("denom = d_votes + r_votes", inplace = True)
votes_wide.eval("r_share = r_votes/denom", inplace = True)
votes_wide.eval("d_share = d_votes/denom", inplace = True)
votes_wide.eval("r_margin = r_share - d_share", inplace = True)
votes_wide.head()
state | fips | d_votes | r_votes | denom | r_share | d_share | r_margin | |
---|---|---|---|---|---|---|---|---|
0 | Alabama | 1 | 729547 | 1318255 | 2047802 | 0.643741 | 0.356259 | 0.287483 |
1 | Alaska | 2 | 116454 | 163387 | 279841 | 0.583857 | 0.416143 | 0.167713 |
2 | Arizona | 4 | 1161209 | 1252401 | 2413610 | 0.518891 | 0.481109 | 0.037782 |
3 | Arkansas | 5 | 380494 | 684872 | 1065366 | 0.642851 | 0.357149 | 0.285703 |
4 | California | 6 | 8753788 | 4483810 | 13237598 | 0.338718 | 0.661282 | -0.322564 |
ec_by_state = pd.read_csv("https://raw.githubusercontent.com/PitchInteractiveInc/tilegrams/master/data/us/electoral-college-votes-by-state.csv",
names = ['fips', 'EC_votes'], header = None)
ec_by_state
fips | EC_votes | |
---|---|---|
0 | 1 | 9 |
1 | 2 | 3 |
2 | 4 | 11 |
3 | 5 | 6 |
4 | 6 | 55 |
5 | 8 | 9 |
6 | 9 | 7 |
7 | 10 | 3 |
8 | 11 | 3 |
9 | 12 | 29 |
10 | 13 | 16 |
11 | 15 | 4 |
12 | 16 | 4 |
13 | 17 | 20 |
14 | 18 | 11 |
15 | 19 | 6 |
16 | 20 | 6 |
17 | 21 | 8 |
18 | 22 | 8 |
19 | 23 | 4 |
20 | 24 | 10 |
21 | 25 | 11 |
22 | 26 | 16 |
23 | 27 | 10 |
24 | 28 | 6 |
25 | 29 | 10 |
26 | 30 | 3 |
27 | 31 | 5 |
28 | 32 | 6 |
29 | 33 | 4 |
30 | 34 | 14 |
31 | 35 | 5 |
32 | 36 | 29 |
33 | 37 | 15 |
34 | 38 | 3 |
35 | 39 | 18 |
36 | 40 | 7 |
37 | 41 | 7 |
38 | 42 | 20 |
39 | 44 | 4 |
40 | 45 | 9 |
41 | 46 | 3 |
42 | 47 | 11 |
43 | 48 | 38 |
44 | 49 | 6 |
45 | 50 | 3 |
46 | 51 | 13 |
47 | 53 | 12 |
48 | 54 | 5 |
49 | 55 | 10 |
50 | 56 | 3 |
geo_df = states.merge(votes_wide, left_on = "NAME", right_on = "state").merge(ec_by_state, left_on = 'fips', right_on = 'fips')
geo_df.head()
GEO_ID | STATE | NAME | LSAD | CENSUSAREA | geometry | state | fips | d_votes | r_votes | denom | r_share | d_share | r_margin | EC_votes | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0400000US23 | 23 | Maine | 30842.923 | MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... | Maine | 23 | 357735 | 335593 | 693328 | 0.484032 | 0.515968 | -0.031936 | 4 | |
1 | 0400000US25 | 25 | Massachusetts | 7800.058 | MULTIPOLYGON (((-70.83204 41.60650, -70.82373 ... | Massachusetts | 25 | 1995196 | 1090893 | 3086089 | 0.353487 | 0.646513 | -0.293026 | 11 | |
2 | 0400000US26 | 26 | Michigan | 56538.901 | MULTIPOLYGON (((-88.68443 48.11579, -88.67563 ... | Michigan | 26 | 2268839 | 2279543 | 4548382 | 0.501177 | 0.498823 | 0.002353 | 16 | |
3 | 0400000US30 | 30 | Montana | 145545.801 | POLYGON ((-104.05770 44.99743, -104.25015 44.9... | Montana | 30 | 177709 | 279240 | 456949 | 0.611097 | 0.388903 | 0.222193 | 3 | |
4 | 0400000US32 | 32 | Nevada | 109781.180 | POLYGON ((-114.05060 37.00040, -114.04999 36.9... | Nevada | 32 | 539260 | 512058 | 1051318 | 0.487063 | 0.512937 | -0.025874 | 6 |
a = geo_df.NAME.unique()
print(sorted(a))
['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'District of Columbia', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']
geo_df['R_or_D'] = np.where(geo_df.r_margin > 0, "R", "D")
geo_df['EC_Weight'] = geo_df['EC_votes'] / geo_df.EC_votes.sum()
geo_df['weighted_r_margin'] = geo_df.r_margin * geo_df.EC_Weight
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.colors as colors
plt.style.use('seaborn-white')
f, ax = plt.subplots(2, 2, figsize = (15, 13), dpi = 150)
# dummy
geo_df.loc[~geo_df.state.isin(['Alaska', "Hawaii"])].plot(column = 'R_or_D',
edgecolor = 'k', ax = ax[0][0], cmap = "bwr")
ax[0][0].set_axis_off()
ax[0][0].set_title("R Win Dummy")
##############################################################3
# r vote share
##############################################################3
vmin, vmax, vcenter = geo_df.r_share.min(), geo_df.r_share.max(), 0.5
divnorm = colors.DivergingNorm(vmin=vmin, vcenter=vcenter, vmax=vmax)
# create a normalized colorbar
cbar = plt.cm.ScalarMappable(norm=divnorm, cmap='RdBu')
# r vote share
geo_df.loc[~geo_df.state.isin(['Alaska', "Hawaii"])].plot(column = 'r_share',
edgecolor = 'k', ax = ax[0][1], cmap = "RdBu_r", norm=divnorm,
legend = True, legend_kwds={'orientation': "horizontal"})
ax[0][1].set_title("R Vote Share")
ax[0][1].set_axis_off()
##############################################################3
# margin
##############################################################3
vmin, vmax, vcenter = geo_df.r_margin.min(), geo_df.r_margin.max(), 0
divnorm2 = colors.DivergingNorm(vmin=vmin, vcenter=vcenter, vmax=vmax)
geo_df.loc[~geo_df.state.isin(['Alaska', "Hawaii"])].plot(column = 'r_margin',
edgecolor = 'k', ax = ax[1][0], cmap = "bwr", norm = divnorm2,
legend = True, legend_kwds={'orientation': "horizontal"})
ax[1][0].set_title("R Margin")
ax[1][0].set_axis_off()
##############################################################3
# r margin weighted by EC vote share
##############################################################3
vmin, vmax, vcenter = geo_df.weighted_r_margin.min(), geo_df.weighted_r_margin.max(), 0
divnorm2 = colors.DivergingNorm(vmin=vmin, vcenter=vcenter, vmax=vmax)
geo_df.loc[~geo_df.state.isin(['Alaska', "Hawaii"])].plot(column = 'weighted_r_margin',
edgecolor = 'k', ax = ax[1][1], cmap = "bwr", norm = divnorm2,
legend = True, legend_kwds={'orientation': "horizontal"})
ax[1][1].set_title("R Margin weighted by EC Votes")
ax[1][1].set_axis_off()
f.suptitle('Choropleth Maps for the 2016 Presidential Election', fontsize = 20)
f.tight_layout()
f.subplots_adjust(top=0.88)
# plt.subplots_adjust(wspace=0, hspace=0)
<matplotlib.axes._subplots.AxesSubplot at 0x7fd4f1332cd0>
Text(0.5, 1, 'R Win Dummy')
<matplotlib.axes._subplots.AxesSubplot at 0x7fd4f0dd8750>
Text(0.5, 1, 'R Vote Share')
<matplotlib.axes._subplots.AxesSubplot at 0x7fd4f0d67c10>
Text(0.5, 1, 'R Margin')
<matplotlib.axes._subplots.AxesSubplot at 0x7fd4f0d17850>
Text(0.5, 1, 'R Margin weighted by EC Votes')
Text(0.5, 0.98, 'Choropleth Maps for the 2016 Presidential Election')
bound = max(abs(geo_df.r_margin.min()), abs(geo_df.r_margin.max()))
-bound
+bound
-0.9139037668370817
0.9139037668370817
f, ax = plt.subplots(2, 2, figsize = (15, 13), dpi = 150)
# dummy
geo_df.loc[~geo_df.state.isin(['Alaska', "Hawaii"])].plot(column = 'R_or_D',
edgecolor = 'k', ax = ax[0][0], cmap = "bwr")
ax[0][0].set_axis_off()
ax[0][0].set_title("R Win Dummy")
##############################################################3
# r vote share
##############################################################3
vmin, vmax, vcenter = geo_df.r_share.quantile(0.05), geo_df.r_share.quantile(0.95), 0.5
divnorm = colors.DivergingNorm(vmin=vmin, vcenter=vcenter, vmax=vmax)
# create a normalized colorbar
cbar = plt.cm.ScalarMappable(norm=divnorm, cmap='RdBu')
# r vote share
geo_df.loc[~geo_df.state.isin(['Alaska', "Hawaii"])].plot(column = 'r_share',
edgecolor = 'k', ax = ax[0][1], cmap = "RdBu_r", norm=divnorm,
legend = True, legend_kwds={'orientation': "horizontal"})
ax[0][1].set_title("R Vote Share")
ax[0][1].set_axis_off()
##############################################################3
# margin
##############################################################3
lb = max(abs(geo_df.r_margin.quantile(0.05)), abs(geo_df.r_margin.quantile(0.95)))
vmin, vmax, vcenter = -lb, +lb, 0
divnorm2 = colors.DivergingNorm(vmin=vmin, vcenter=vcenter, vmax=vmax)
geo_df.loc[~geo_df.state.isin(['Alaska', "Hawaii"])].plot(column = 'r_margin',
edgecolor = 'k', ax = ax[1][0], cmap = "bwr", norm = divnorm2,
legend = True, legend_kwds={'orientation': "horizontal"})
ax[1][0].set_title("R Margin")
ax[1][0].set_axis_off()
##############################################################3
# r margin weighted by EC vote share
##############################################################3
lb = max(abs(geo_df.weighted_r_margin.quantile(0.05)), abs(geo_df.weighted_r_margin.quantile(0.95)))
vmin, vmax, vcenter = -lb, +lb, 0
divnorm2 = colors.DivergingNorm(vmin=vmin, vcenter=vcenter, vmax=vmax)
geo_df.loc[~geo_df.state.isin(['Alaska', "Hawaii"])].plot(column = 'weighted_r_margin',
edgecolor = 'k', ax = ax[1][1], cmap = "bwr", norm = divnorm2,
legend = True, legend_kwds={'orientation': "horizontal"})
ax[1][1].set_title("R Margin weighted by EC Votes")
ax[1][1].set_axis_off()
f.suptitle('Choropleth Maps for the 2016 Presidential Election \n symmetric colormaps for margins', fontsize = 20)
f.tight_layout()
f.subplots_adjust(top=0.88)
# plt.subplots_adjust(wspace=0, hspace=0)
<matplotlib.axes._subplots.AxesSubplot at 0x7fd4d55d3cd0>
Text(0.5, 1, 'R Win Dummy')
<matplotlib.axes._subplots.AxesSubplot at 0x7fd4d2070b50>
Text(0.5, 1, 'R Vote Share')
<matplotlib.axes._subplots.AxesSubplot at 0x7fd4d202bed0>
Text(0.5, 1, 'R Margin')
<matplotlib.axes._subplots.AxesSubplot at 0x7fd4d1fdab90>
Text(0.5, 1, 'R Margin weighted by EC Votes')
Text(0.5, 0.98, 'Choropleth Maps for the 2016 Presidential Election \n symmetric colormaps for margins')