#!/usr/bin/env python
# coding: utf-8

# # Election data example
# 
# By Ben Welsh
# 
# How you can use Python and pandas to work with California elections data [published by](https://www.californiacivicdata.org/2017/10/31/processed-files/) the California Civic Data Coalition.

# In[10]:


import pandas as pd


# In[11]:


get_ipython().run_line_magic('matplotlib', 'inline')


# ### Read in data from the Coalition's API

# In[12]:


candidates = pd.read_csv("https://calaccess.download/latest/Candidates.csv")


# In[13]:


candidates.head()


# ### How many candidates?

# In[14]:


len(candidates)


# ### How has the total changed over time?

# Cast the election date column into the datetime data type.

# In[15]:


candidates.election_date = pd.to_datetime(candidates.election_date)


# Group the elections into two-year cycles to simplify the analysis.

# In[16]:


def get_cycle(dt):
    if dt.year % 2 == 0:
        return dt.year
    else:
        return dt.year +1


# In[17]:


candidates['election_year'] = candidates.election_date.apply(get_cycle)


# Group by year

# In[18]:


by_year = candidates.groupby("election_year").size().reset_index()


# Chart the results

# In[19]:


by_year.set_index("election_year").plot.bar(figsize=(20, 10))


# ### How about the different parties? How have their totals changed over time?

# Group by party

# In[20]:


by_party = candidates.groupby(["party_name", "election_year"]).size().reset_index()


# Transpose it into a crosstab

# In[21]:


party_crosstab = by_party.set_index([
    "election_year", "party_name"
]).unstack(1).reset_index().fillna(0)


# Clean up the crosstab so it's closer to a simple DataFrame

# In[22]:


party_crosstab = party_crosstab.reset_index()
party_crosstab.columns = party_crosstab.columns.droplevel(0)
party_crosstab = party_crosstab.rename_axis(None, axis=1)
party_crosstab = party_crosstab.set_index("")


# Chart each party

# In[23]:


party_crosstab.plot.bar(
    subplots=True,
    figsize=(20, 30),
    legend=False,
    sharey=True,
    ylim=(0, 500)
)


# ### How do the two major parties compare to the third parties?

# Create a new column that groups the third parties together

# In[24]:


candidates['party_type'] = candidates.party_name.apply(
    lambda x: x if x in ['DEMOCRATIC', 'REPUBLICAN'] else 'THIRD'
)


# Regroup the data with that column

# In[25]:


by_party_type = candidates.groupby(["party_type", "election_year"]).size().reset_index()


# Again, transpose it to a crosstab

# In[26]:


party_type_crosstab = by_party_type.set_index(
    ["election_year", "party_type"]
).unstack(1).reset_index().fillna(0)


# Clean up the crosstab

# In[27]:


party_type_crosstab = party_type_crosstab.reset_index()
party_type_crosstab.columns = ['INDEX', 'YEAR', 'DEMOCRATIC', 'REPUBLICAN', 'THIRD']
party_type_crosstab = party_type_crosstab.rename_axis(None, axis=1)
party_type_crosstab.drop("INDEX", axis=1, inplace=True)


# Chart the results

# In[28]:


party_type_crosstab.set_index("YEAR").plot.bar(
    subplots=True,
    figsize=(20, 15),
    legend=False,
    sharey=True,
    ylim=(0, 500)
)


# Print out the data so we can eyeball it as well.

# In[29]:


party_type_crosstab


# ### How has the gap between the GOP and the Dems changed?

# Calculate a new column with the gap

# In[30]:


party_type_crosstab['DEM_VS_GOP'] = party_type_crosstab['DEMOCRATIC'] - party_type_crosstab['REPUBLICAN']


# Chart it

# In[35]:


party_type_crosstab.plot.bar(x="YEAR", y="DEM_VS_GOP", legend=False)


# Inspect it

# In[31]:


party_type_crosstab