Created by SmirkyGraphs. Code: GitHub. Source: RI BOE.
using pandas to get voter turnout for each race and by precinct
# NOTE
# Replaced BALLOT TOTAL with page 1 ballot totals
# before 2016 in these precincts with multiple pages
# prior to 2016 each page was counted as 1 ballot
# leading to well over 100% turnout which I don't
# think was true in reality.
import pandas as pd
import glob as glob
files = glob.glob('./data/raw/*.xls')
rename = {'City/Town':'Precinct Name', 'Precinct': 'Precinct Name', 'Election Day': 'E Day',
'Mail Ballots': 'Mail', 'Contest': 'Office Title', 'Contest Title': 'Office Title',
'Candidate Name': 'Candidate', 'Precinct No.': 'Precinct #'}
cols = ['Precinct Name', 'Precinct #', 'Total', 'E Day', 'Mail', 'Office Title']
contest_frame = []
turnout_frame = []
for f in files:
df = pd.read_excel(f, converters={'Precinct #': '{:0>4}'.format})
df = df.rename(columns=rename)
df = df[cols]
# cleaning data
df['Precinct Name'] = df['Precinct Name'].str.strip()
df = df[~df['Precinct Name'].str.contains('Limited')]
df = df[~df['Precinct Name'].str.contains('Federal')]
df = df[~df['Precinct Name'].str.contains('President')]
df = df[~df['Office Title'].str.contains('Page')]
# registered voters by precinct
registered_voters = df[df['Office Title'] == 'REGISTERED VOTERS - TOTAL']
columns = ['E Day', 'Mail', 'Office Title']
registered_voters = registered_voters.drop(columns=columns)
registered_voters = registered_voters.rename(columns = {'Total': 'registered_voters'})
registered_voters = registered_voters.reset_index(drop=True)
# ballots cast by precinct
ballots_cast = df[df['Office Title'] == 'BALLOTS CAST - TOTAL']
columns = ['E Day', 'Mail', 'Office Title']
ballots_cast = ballots_cast.drop(columns=columns)
ballots_cast = ballots_cast.rename(columns = {'Total': 'ballots_cast'})
ballots_cast = ballots_cast.reset_index(drop=True)
# mail in voters by precinct
mail_in = df[df['Office Title'] == 'BALLOTS CAST - TOTAL']
columns = ['E Day', 'Total', 'Office Title']
mail_in = mail_in.drop(columns=columns)
mail_in = mail_in.rename(columns = {'Mail': 'mail_in_ballots'})
mail_in = mail_in.reset_index(drop=True)
# merging the 2 datasets
voter_table = registered_voters.merge(ballots_cast)
voter_table = voter_table.merge(mail_in)
# getting total votes by precinct
contest_votes = df.groupby(['Office Title', 'Precinct Name'])['Total'].sum()
contest_votes = pd.DataFrame(contest_votes)
contest_votes = contest_votes.reset_index()
# combining all datasets and getting turnout
contest_votes = contest_votes.merge(voter_table, how='left', on='Precinct Name')
contest_votes = contest_votes.groupby(['Office Title'])['registered_voters', 'ballots_cast', 'mail_in_ballots'].sum()
contest_votes['turnout'] = contest_votes['ballots_cast']/contest_votes['registered_voters']
voter_table['turnout'] = voter_table['ballots_cast']/voter_table['registered_voters']
voter_table['percent_mail'] = voter_table['mail_in_ballots']/voter_table['ballots_cast']
# adding tag of year/cycle
contest_votes['election_type_year'] = f[7:-4]
voter_table['election_type_year'] = f[7:-4]
# adding looped cleaned data to frame lists
contest_frame.append(contest_votes)
turnout_frame.append(voter_table)
contest_frame = pd.concat(contest_frame, sort=False)
turnout_frame = pd.concat(turnout_frame, sort=False)
turnout_frame.to_csv('./data/clean/precinct_turnout.csv', index=False)
contest_frame.to_csv('./data/clean/contest_turnout.csv', index=False)