#!/usr/bin/env python # coding: utf-8 # __________ # ## Advisory! # ** This is the stripped down `dw-nominate` notebook.
# If you're new to Python or want thorough documentation, please view the `dw-nominate-detail` notebook.** # ____________ # # DW-Nominate Exploration # # The Nominate scoring scale was first developed by Keith T. Poole and Howard Rosenthal in the late 1980's.
# Since then, it has undergone several iterations, with the DW-series being the latest.
# Scores are derived from roll call votes, and contain 2 dimensions:
# 1. Allowing us to place Senators, House members, and their political orgs on the liberal-convervative [-1, 1] spectrum (1st dimension).
# 2. Nominate also quantifies the opposition/support of civil rights for underrepresented minorities (2nd dimension). # # Read more about these metrics on the Voteview website. # # The following notebook is going to build off a visualization made by the Pew Research Institute, by making a gif of changes of house ideology accross congresses. # Here's the finished product: # # GIF hosted on Github. # We're going to # 1. Read a fixed-width text file from an anonymous ftp hosted on the web. # 2. Use Pandas dataframes to filter, replace, and aggregate data. # 3. Plot data using Panda's Matplotlib extension. # 4. Generate a GIF out of static png files. # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import os import glob import us import numpy as np import pandas as pd import matplotlib.pyplot as plt import matplotlib.ticker as ticker import imageio # where the raw files are hosted: # In[2]: senate_dl = 'ftp://k7moa.com/junkord/SL01113D21_BSSE.dat' house_dl = 'ftp://k7moa.com/junkord/HL01113D21_BSSE.DAT' # We can store these links in a list of tuples, later we'll iterate through `args` to download the correct files. # In[3]: args = [('senate', senate_dl), ('house', house_dl)] # In[4]: for arg in args: print(arg[1]) # columns copied and pasted from voteview docs. # In[5]: cols = '''Congress Number ICPSR ID Number State Code Congressional District Number State Name Party Code Name 1st Dimension Coordinate 2nd Dimension Coordinate 1st Dimension Bootstrapped Standard Error 2nd Dimension Bootstrapped Standard Error Correlation Between 1st and 2nd Dimension Log-Likelihood Number of Votes Number of Classification Errors Geometric Mean Probability'''.split('\n') # In[91]: "We split the columns by a newline character (\n), to get {} columns.".format(len(cols)) # In[7]: col_widths = [(0,4), (4,10), (10,13), (13,15), (15,23), (23,28), (28,40), (40,50), (50,60), (60,70), (70,80), (80,90), (90,102), (102,107), (107, 112)] # In[77]: # This dict is for compat. with Propublica Congress API. col_mapping = {'Name' : 'last_name', 'Congress Number': 'Congress'} # dict comprehension to convert state names to abbreviations. state_dict = {state.name.upper()[:7] : state.abbr for state in us.states.STATES} # In[70]: df = pd.read_fwf(house_dl, names=cols, colspecs=col_widths) # In[87]: df.head(3) # Let's read both files into one dataframe. # In[14]: df_congress = pd.DataFrame() for chamber in args: df = pd.read_fwf(chamber[1], names=cols, colspecs=col_widths) # replace column names from col_mapping dict df.columns = [col_mapping.get(col, col) for col in df.columns] # create column for senate or house df['chamber'] = chamber[0] # convert state names to state id. df['state'] = df['State Name'].replace(state_dict) df_congress = df_congress.append(df, ignore_index=True) # In[15]: df_congress.chamber.unique() # let's set some variables for the next graph. # In[54]: first_congress_yr = 1789 colors = ['b', 'r'] annotation = ('Ideology score from 1st Dimensional Coordinate of DW-Nominate\n' 'Source: voteview.com/dwnomin.htm\nAuthor: @leonyin') col_name = 'More Liberal' + (57 * ' ') + 'More Conservative' house_plot = { 'ylim' : [0, 80], 'y_maj' : 20, 'y_min' : 10, 'ylabel': '# House Reps' } senate_plot ={ 'ylim' : [0, 24], 'y_maj' : 8, 'y_min' : 4, 'ylabel' : '# Senators' } # In[36]: def get_nominal(x): ''' Returns a nominal for each congress number ''' if x % 10 == 1 and x < 110: return 'st' elif x % 10 == 2 and x < 110: return 'nd' elif x % 10 == 3 and x < 110: return 'rd' else: return 'th' def rep_dem_indie(row): ''' Maps Democrat, Republican, or Other for a given Pandas row. ''' if row['Party Code'] == 100: return 'D' elif row['Party Code'] == 200: return 'R' else: return None # In[139]: def plot_polarity(congress, chamber='senate', how='area', col='1st Dimension Coordinate'): ''' Plot the liberal-conservative polarity for a congress for either house of reps or senate. The default metric plotted is the 1st dim coordinate. ''' if chamber == 'house': plot_vars = house_plot else: plot_vars = senate_plot # Set variables for title congress_year = first_congress_yr + (2 * congress) - 2 congress_nominal = str(congress) + get_nominal(congress) # create supertitle and subtitle. sup_title = '{} Ideology'.format(chamber.title()) title = '{} Congress {}-{}'.format(congress_nominal, congress_year, congress_year + 1) # filter the df to the correct conress and chamber. df_c = df_congress[(df_congress['Congress'] == congress) & (df_congress['chamber'] == chamber)] # fix up the party names. df_c['Party'] = df_c.apply(rep_dem_indie, axis=1) # round ideology dimension. df_c[col_name] = df_c[col].round(1) # this groupby creates a multi-index, # to remove one index, we use the unstack() function, # reverting Party back into a column df2 = df_c.groupby(['Party', col_name])[col_name].count() \ .unstack('Party').fillna(0) # below is all matplotlib functions. fig = plt.figure() fig.suptitle(sup_title, fontsize=17) ax = fig.add_subplot(111) # plot the one line per party df2[['D','R']].plot(kind=how, stacked=False, color=colors, alpha=.46, xlim=[-1,1], ylim=plot_vars['ylim'], ax=ax, title=title) # add y label ax.set_ylabel(plot_vars['ylabel']) # vertical line at x = 0 plt.axvline(0, color='k', linestyle='dotted') # plot legend plt.legend(loc='upper right', frameon=False) # tailor x and y axis ticks ax.xaxis.set_major_locator(ticker.MultipleLocator(1)) ax.xaxis.set_minor_locator(ticker.MultipleLocator(.1)) ax.yaxis.set_major_locator(ticker.MultipleLocator(plot_vars['y_maj'])) ax.yaxis.set_minor_locator(ticker.MultipleLocator(plot_vars['y_min'])) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') # write annotations plt.annotate(annotation, (0,0), (0, -32), xycoords='axes fraction', textcoords='offset points', va='top') plt.subplots_adjust(top=0.86) # save the figure in as a png plt.savefig('figs/{}/{}.png'.format(chamber, congress), bbox_inches='tight', dpi=100) # In[140]: plot_polarity(87, chamber='house', how='area') # We can do this for many congresses, and use the images to generate a nice gif.
# For this visualization, I chose to begin at the 87th congress, while JFK was in office. # In[141]: def make_gif(congress, congress_start, congress_end): ''' Saves a png for each congress into the figs subdirectory. Uses ImageIO to combine images into a gif. Deletes all png files in directory. ''' for i in range(congress_start, congress_end): plot_polarity(i, chamber=congress, how='area') filenames = glob.glob('figs/{}/*.png'.format(congress)) images = [] for filename in filenames: images.append(imageio.imread(filename)) os.remove(filename) kwargs = { 'duration': .23 } imageio.mimsave('figs/{}/movie.gif'.format(congress), images, **kwargs) # In[144]: make_gif(congress='house', congress_start=87, congress_end=114) # Here's the finished product: # # GIF hosted on Github. # Notice that values on the liberal spectrum are almost never less than -0.7.
# Also notice the gradual polarization between the two parties. # This is the beginning of what we're going to use the Nominate dataset for.
# In the next notebook, we'll examine how we can extend the use of the Nominate dataset to the Propublica Congress API.