#!/usr/bin/env python # coding: utf-8 # # Election data example # # By Ben Welsh # # How you can use Python and pandas to work with California elections data [published by](https://www.californiacivicdata.org/2017/10/31/processed-files/) the California Civic Data Coalition. # In[10]: import pandas as pd # In[11]: get_ipython().run_line_magic('matplotlib', 'inline') # ### Read in data from the Coalition's API # In[12]: candidates = pd.read_csv("https://calaccess.download/latest/Candidates.csv") # In[13]: candidates.head() # ### How many candidates? # In[14]: len(candidates) # ### How has the total changed over time? # Cast the election date column into the datetime data type. # In[15]: candidates.election_date = pd.to_datetime(candidates.election_date) # Group the elections into two-year cycles to simplify the analysis. # In[16]: def get_cycle(dt): if dt.year % 2 == 0: return dt.year else: return dt.year +1 # In[17]: candidates['election_year'] = candidates.election_date.apply(get_cycle) # Group by year # In[18]: by_year = candidates.groupby("election_year").size().reset_index() # Chart the results # In[19]: by_year.set_index("election_year").plot.bar(figsize=(20, 10)) # ### How about the different parties? How have their totals changed over time? # Group by party # In[20]: by_party = candidates.groupby(["party_name", "election_year"]).size().reset_index() # Transpose it into a crosstab # In[21]: party_crosstab = by_party.set_index([ "election_year", "party_name" ]).unstack(1).reset_index().fillna(0) # Clean up the crosstab so it's closer to a simple DataFrame # In[22]: party_crosstab = party_crosstab.reset_index() party_crosstab.columns = party_crosstab.columns.droplevel(0) party_crosstab = party_crosstab.rename_axis(None, axis=1) party_crosstab = party_crosstab.set_index("") # Chart each party # In[23]: party_crosstab.plot.bar( subplots=True, figsize=(20, 30), legend=False, sharey=True, ylim=(0, 500) ) # ### How do the two major parties compare to the third parties? # Create a new column that groups the third parties together # In[24]: candidates['party_type'] = candidates.party_name.apply( lambda x: x if x in ['DEMOCRATIC', 'REPUBLICAN'] else 'THIRD' ) # Regroup the data with that column # In[25]: by_party_type = candidates.groupby(["party_type", "election_year"]).size().reset_index() # Again, transpose it to a crosstab # In[26]: party_type_crosstab = by_party_type.set_index( ["election_year", "party_type"] ).unstack(1).reset_index().fillna(0) # Clean up the crosstab # In[27]: party_type_crosstab = party_type_crosstab.reset_index() party_type_crosstab.columns = ['INDEX', 'YEAR', 'DEMOCRATIC', 'REPUBLICAN', 'THIRD'] party_type_crosstab = party_type_crosstab.rename_axis(None, axis=1) party_type_crosstab.drop("INDEX", axis=1, inplace=True) # Chart the results # In[28]: party_type_crosstab.set_index("YEAR").plot.bar( subplots=True, figsize=(20, 15), legend=False, sharey=True, ylim=(0, 500) ) # Print out the data so we can eyeball it as well. # In[29]: party_type_crosstab # ### How has the gap between the GOP and the Dems changed? # Calculate a new column with the gap # In[30]: party_type_crosstab['DEM_VS_GOP'] = party_type_crosstab['DEMOCRATIC'] - party_type_crosstab['REPUBLICAN'] # Chart it # In[35]: party_type_crosstab.plot.bar(x="YEAR", y="DEM_VS_GOP", legend=False) # Inspect it # In[31]: party_type_crosstab