import pandas as pd
import numpy as np
import os
!pip install jellyfish
Requirement already satisfied: jellyfish in /opt/conda/lib/python3.6/site-packages (0.7.1)
import jellyfish
# All data files are located here. Change if running.
dataDir = r'C:\Users\Ian\Dropbox\ECON 407 - US Courts & Judges'
# Load raw data
#judge_att_data = pd.read_csv(os.path.join(dataDir,'Judge Attribute Data.csv'))
judge_att_data = pd.read_csv('Judge Attribute Data.csv')
# Drop unnecessary columns, rename necessary columns
judge_att_data = judge_att_data.drop(columns = ['name_original','___l','___j','___char','elevate','dcother',
'liable', 'dummy','religion','circuit',
'songer_code','amon','crossl','pred','appt','temp',
'trans','liable','abamin','dsenate','rsenate','dhouse',
'rhouse','fhouse','fsenate','drhouse','drsenate',
'whouse','wsenate','nrhouse','nrsenate','dsens','rsens',
'yeari','yearc','e1','e2','e3','e4','e5','e6','congresi',
'unity','e7','e8','yearo','congreso','unityo','cityb',
'badeg','bastate','bastatus','jddeg','jdstate','jdstatus',
'grad1','grad2','tperm','fsens','drsens','wsens','nrsens',
'osens','agego','service','csb','ba','bast','bapp','ls',
'lsst','jdpp','graddeg1','graddeg2','statecab','state2',
'recdate','ageon'])
judge_att_data = judge_att_data.rename(columns = {'name':'Name','circuit_original':'Circuit','id':'ID',
'pres':'Appointing President','yearl':'Year of Departure',
'yearb':'Year of Birth','yeard':'Year of Death',
'pleft':'President when Departed','left':'Reason for Departing',
'party':'Judge Party','district':'District','state':'State',
'city':'City','gender':'Gender','race':'Race',
'ayear':'Year of Appointment','crossa':'Cross Appointment',
'recess':'Recess Appointment','aba':'ABA Rating',
'assets':'Assets','congress':'Congress','unityi':'Unity',
'hdem':'House Democrats','hrep':'House Republicans',
'sdem':'Senate Democrats','srep':'Senate Republicans',
'hother':'House Independents','sother':'Senate Independents',
'networth':'Net Worth','appres':'Appointing President Party'})
# Replace zero values with missing for net worth and assets
def replace_zero_with_na(x):
if x == 0:
return np.nan
else:
return x
judge_att_data['Assets'] = judge_att_data['Assets'].apply(replace_zero_with_na)
judge_att_data['Net Worth'] = judge_att_data['Net Worth'].apply(replace_zero_with_na)
# Turn the position indicator columns into dummies and rename
def turn_into_dummy(val):
if np.isnan(val):
return 0
else:
return 1
position_columns = list(filter(lambda col: col[0] == 'p', list(judge_att_data.columns)))
for col in position_columns:
judge_att_data[col] = judge_att_data[col].apply(turn_into_dummy)
judge_att_data = judge_att_data.rename(columns = {col:'Previous Position - ' + col[1:]})
#Creating new variable for whether judge held any of the elected positions
#These are the variables for the judge holding elected office of some kind
political_positions = ['Previous Position - house', 'Previous Position - senate',
'Previous Position - gov','Previous Position - ssenate',
'Previous Position - shouse','Previous Position - mayor','Previous Position - ccoun']
#Creating column of 0's which we will then fill
judge_att_data["Politician"] = 0*judge_att_data['Previous Position - house']
for position in political_positions:
judge_att_data["Politician"] = np.maximum(judge_att_data["Politician"],judge_att_data[position])
#Creating new variable for judge's age at the time of appointment
judge_att_data["Age When Appointed"] = judge_att_data["Year of Appointment"] - judge_att_data["Year of Birth"]
# Load ideology data
#judge_ideo_score = pd.read_excel(os.path.join(dataDir,'Judge Ideology Scores.xlsx'))
judge_ideo_score = pd.read_excel('Judge Ideology Scores.xlsx')
judge_ideo_score = judge_ideo_score[['judgename','ideology_score']]
judge_ideo_score = judge_ideo_score.rename(columns = {'judgename':'Name','ideology_score':'Ideology Score'})
def get_best_name_match_from_list(name, data_list):
best_match = ""
highest_jw = 0
for potential_match in data_list:
current_score = jellyfish.jaro_winkler(potential_match, name)
if ((current_score > highest_jw) and (current_score > 0.89)):
highest_jw = current_score
best_match = potential_match
return best_match
judge_att_data['Closest Name'] = judge_att_data['Name'].apply(lambda x : get_best_name_match_from_list(x,judge_ideo_score['Name']))
judge_att_data = judge_att_data.merge(judge_ideo_score, left_on = 'Closest Name', right_on = 'Name', how = 'left')
judge_att_data = judge_att_data.drop(columns = ['Name_y','Closest Name'])
judge_att_data = judge_att_data.rename(columns = {'Name_x':'Name'})
#judge_att_data.to_csv(os.path.join(dataDir,'Judge Attribute and Ideology.csv'), index = False)
judge_att_data.to_csv('Judge Attribute and Ideology.csv', index = False)