# -*- coding: utf-8 -*-
%matplotlib inline
import pandas as pd
import numpy as np
import math
import matplotlib as mplstyle
import matplotlib.pyplot as plt
import locale
from locale import atof
from dateutil import parser
from datetime import datetime
plt.style.use('fivethirtyeight')
df_vouchersBySchool = pd.read_csv('school_choice_data/imported_data/awards_by_school_16.csv')
df_specialEdEnrollment = pd.read_csv('school_choice_data/gov_data/EnrollmentEnglishLearnerSpecEdSchls_ALL.csv')
df_missingSpecialEdEnrollment = pd.read_csv('school_choice_data/imported_data/updatedEnrollment.csv')
df_vouchersByOgSchoolDistrict = pd.read_csv('school_choice_data/imported_data/voucher-students-by-original-school-district.csv')
df_publicSpecialEdEnrollment = pd.read_csv('school_choice_data/gov_data/PublicEnrollmentEnglishLearnerSpecEd.csv')
df_schoolGrades2015 = pd.read_csv('school_choice_data/imported_data/final-2016-af-school-grades.csv')
# replace symbols
# use 0 for no school choice students and 9 for schools with less than 10 students
def replaceSymbols(id):
if id == '*':
return 9
elif id == '-':
return 0
else:
return id
# normalize numbers
# if is not a number, return None. if is a number, turn it into an integer
def normalizeNums(num):
if (math.isnan(num)):
return None
else:
num = int(math.floor(num))
return num
# multiply by 100 to convert from percent
def convertPercent(num):
return num * 100
# normalize percent from x% to just a number
def normalizePerc(num):
try:
float(num.strip('%'))
except AttributeError:
return num
return float(num.strip('%'))
# get average award amount
# divide award amount by award count if they are both numbers
def averageAward(awardCount, awardAmount):
if math.isnan(awardAmount) or math.isnan(awardCount):
return None
return round((awardAmount/awardCount),2)
# Convert NaN to zeros
# the zeros do not always indicate that no students
# were in the program. schools with less than ten
# students do not have to report
def convertInt(num):
if math.isnan(num):
return 0
else:
return int(num)
# calculate percent
def calcPerc(num, denom):
if num != 0:
percent = (float(num/denom))*100
return round(percent,2)
else:
return None
# calculate percent change
def percentChange(oldVal, newVal):
val1 = float(oldVal)
val2 = float(newVal)
change = (val2-val1)/val1
return float(change)
# fill in missing values
# compares two values, takes the first if is not null. if it is, takes second.
def fillInfo(choiceCell, backupCell):
if pd.isnull(choiceCell):
return backupCell
elif math.isnan(choiceCell):
return backupCell
else:
return choiceCell
# replace with None
def fillNone(num):
if num == 1:
return None
else:
return num
Apply function to replace symbols. Drop data from all but most recent school year. Filter out schools with no voucher enrollment.
df_vouchersBySchool2016_filtered = df_vouchersBySchool
df_vouchersBySchool2016_filtered['Voucher enrollment 2016'] = df_vouchersBySchool2016_filtered['2016-2017'].apply(replaceSymbols)
# Trim out data from 2011-2015.
df_vouchersBySchool2016_trimmed = df_vouchersBySchool.drop(['2011-2012','2012-2013','2013-2014','2014-2015','2015-2016','2016-2017'], axis=1)
# Filter out schools with no voucher enrollment in 2016
# In the data, 1 == "-", and 9 == "Less than 10 voucher students"
# We want to filter out the "-" ones because they're basically zero.
df_vouchersBySchool2016_trimmed = df_vouchersBySchool2016_trimmed[df_vouchersBySchool2016_trimmed['Voucher enrollment 2016'] > 0]
# INDIANA SCHOOL ENROLLMENT (ALL)
# Filter out the public schools.
# Public school IDs are numbers-only. Private school IDs have letters + numbers.
def filterPrivate(id):
if pd.isnull(id):
return True
else:
try:
float(id)
except ValueError:
return False
return True
df_specialEdEnrollment_filtered = df_specialEdEnrollment
df_specialEdEnrollment_filtered['filter'] = df_specialEdEnrollment_filtered['SCHL_NAME_CODE'].apply(filterPrivate)
df_specialEdEnrollment_filtered = df_specialEdEnrollment_filtered[df_specialEdEnrollment_filtered['filter'] == False]
df_specialEdEnrollment_filtered = df_specialEdEnrollment_filtered.reset_index()
# Tidy and rename columns
df_specialEdEnrollment_trimmed = df_specialEdEnrollment_filtered.drop(['index','CORP','CORP_NAME','ENR_ELL_N','ENR_ELL_RATIO', 'filter'], axis=1)
df_specialEdEnrollment_trimmed = df_specialEdEnrollment_trimmed.rename(columns = {'SCHL_NAME_CODE':'School No.','SCHL_NAME':'School Name','ENR_SPEC_ED_N':'Special ed enrollment 2016','ENR_SPEC_ED_RATIO':'Special ed ratio 2016','ENROLLMENT_N':'Total enrollment 2016'})
df_specialEdEnrollment_trimmed['Special ed ratio 2016'] = df_specialEdEnrollment_trimmed['Special ed ratio 2016'].apply(normalizePerc)
df_specialEdEnrollment_trimmed
# Tidy and rename updated enrollment columns
df_missingSpecialEdEnrollment = df_missingSpecialEdEnrollment.rename(columns = {'School ID':'School No.','SpecEd 2016':'Special ed enrollment 2016','SpecEd Perc':'Special ed ratio 2016','Enrollment 2016':'Total enrollment 2016'})
df_missingSpecialEdEnrollment['Special ed ratio 2016'] = df_missingSpecialEdEnrollment['Special ed ratio 2016'].apply(normalizePerc)
df_missingSpecialEdEnrollment
# Merge w/ other special education data
df_specialEdEnrollment_merged = df_specialEdEnrollment_trimmed.merge(df_missingSpecialEdEnrollment,on='School No.', how='outer')
df_specialEdEnrollment_merged['Special ed enrollment 2016'] = np.vectorize(fillInfo)(df_specialEdEnrollment_merged['Special ed enrollment 2016_x'],df_specialEdEnrollment_merged['Special ed enrollment 2016_y'])
df_specialEdEnrollment_merged['Special ed ratio 2016'] = np.vectorize(fillInfo)(df_specialEdEnrollment_merged['Special ed ratio 2016_x'],df_specialEdEnrollment_merged['Special ed ratio 2016_y'])
df_specialEdEnrollment_merged['Total enrollment 2016'] = np.vectorize(fillInfo)(df_specialEdEnrollment_merged['Total enrollment 2016_x'],df_specialEdEnrollment_merged['Total enrollment 2016_y'])
df_specialEdEnrollment_merged = df_specialEdEnrollment_merged.drop(['Special ed enrollment 2016_x','Special ed enrollment 2016_y','Special ed ratio 2016_x','Special ed ratio 2016_y','Total enrollment 2016_x','Total enrollment 2016_y'], axis=1)
df_specialEdEnrollment_merged
# df_specialEdEnrollment_merged.to_csv('school_choice_data/exported_data/df_specialEdEnrollment_merged.csv')
# Merge w/ voucher school data
df_vouchersSpecialEd_merged = df_vouchersBySchool2016_trimmed.merge(df_specialEdEnrollment_merged,on='School No.', how='outer')
df_vouchersSpecialEd_merged = df_vouchersSpecialEd_merged.drop(['School Name_y'], axis=1)
df_vouchersSpecialEd_merged = df_vouchersSpecialEd_merged.rename(columns = {'School Name_x':'School Name'})
# df_vouchersSpecialEd_merged
# Associate schools with their current grade by merging grades df and df_vouchersSpecialEd_merged
# Tidy and rename columns
df_schoolGrades2015_trimmed = df_schoolGrades2015.drop(['IDOE_CORPORATION_ID','CORPORATION_NAME'], axis=1)
df_schoolGrades2015_trimmed = df_schoolGrades2015_trimmed.rename(columns = {'IDOE_SCHOOL_ID':'School No.','SCHOOL_NAME':'School Name','2015 Grade':'2015_grade'})
df_schoolGrades2015_trimmed
# Merge w/ voucher school data
df_vouchersSpecialEdGrades_merged = df_vouchersSpecialEd_merged.merge(df_schoolGrades2015_trimmed,on='School No.', how='outer')
df_vouchersSpecialEdGrades_merged = df_vouchersSpecialEdGrades_merged.drop(['School Name_y'], axis=1)
df_vouchersSpecialEdGrades_merged = df_vouchersSpecialEdGrades_merged.rename(columns = {'School Name_x':'School Name'})
df_vouchersSpecialEdGrades_merged
# Filter out all schools without vouchers in 2016
df_vouchersSpecialEdGrades_merged = df_vouchersSpecialEdGrades_merged[df_vouchersSpecialEdGrades_merged['Voucher enrollment 2016'] > 0]
# df_vouchersSpecialEdGrades_merged
# Assign schools to parent school districts
# Tidy and rename
df_vouchersByOgSchoolDistrict_filtered = df_vouchersByOgSchoolDistrict.rename(columns = {'Corp. No.':'District No.','Corporation Name':'District Name'})
df_vouchersByOgSchoolDistrict_filtered = df_vouchersByOgSchoolDistrict_filtered.drop(['Voucher students from this district (2016-2017)'], axis=1)
# df_vouchersByOgSchoolDistrict_filtered
# Merge w/ school ID
df_vouchersSpecialEdGradesDistricts_merged = df_vouchersSpecialEdGrades_merged.merge(df_vouchersByOgSchoolDistrict_filtered,on='School No.', how='outer')
df_vouchersSpecialEdGradesDistricts_merged = df_vouchersSpecialEdGradesDistricts_merged.drop(['School Name_y'], axis=1)
df_vouchersSpecialEdGradesDistricts_merged = df_vouchersSpecialEdGradesDistricts_merged.rename(columns = {'School Name_x':'School Name'})
df_vouchersSpecialEdGradesDistricts_merged
df_vouchersSpecialEdGradesDistricts_merged.to_csv('school_choice_data/exported_data/df_vouchersSpecialEdGradesDistricts_merged.csv')
# Drop columns, group by district and reset index.
df_districtVoucherSpecialEdAggregate = df_vouchersSpecialEdGradesDistricts_merged.drop(['Special ed ratio 2016'], axis=1)
df_districtVoucherSpecialEdAggregate = df_districtVoucherSpecialEdAggregate.groupby('District No.').sum()
df_districtVoucherSpecialEdAggregate = df_districtVoucherSpecialEdAggregate.reset_index()
df_districtVoucherSpecialEdAggregate
# Recalculate special ed ratio
df_districtVoucherSpecialEdAggregate['Special ed ratio 2016'] = np.vectorize(calcPerc)(df_districtVoucherSpecialEdAggregate['Special ed enrollment 2016'],df_districtVoucherSpecialEdAggregate['Total enrollment 2016'])
# df_districtVoucherSpecialEdAggregate
# Merge in public school special ed enrollment data
# Tidy and clean columns
df_publicSpecialEdEnrollment_trimmed = df_publicSpecialEdEnrollment
df_publicSpecialEdEnrollment_trimmed = df_publicSpecialEdEnrollment_trimmed.drop(['ENR_ELL_N','ENR_ELL_RATIO'], axis=1)
df_publicSpecialEdEnrollment_trimmed = df_publicSpecialEdEnrollment_trimmed.rename(columns = {'CORP':'District No.','CORP_NAME':'District Name','ENR_SPEC_ED_N':'Public special ed enrollment 2016','ENR_SPEC_ED_RATIO':'Public special ed ratio 2016','ENROLLMENT_N':'Public total enrollment 2016'})
df_publicSpecialEdEnrollment_trimmed['Public special ed ratio 2016'] = df_publicSpecialEdEnrollment_trimmed['Public special ed ratio 2016'].apply(normalizePerc)
df_publicSpecialEdEnrollment_trimmed
# Merge
df_districtVoucherSpecialEdAggregate_merged = df_publicSpecialEdEnrollment_trimmed.merge(df_districtVoucherSpecialEdAggregate,on='District No.', how='outer')
df_districtVoucherSpecialEdAggregate_merged
# df_districtVoucherSpecialEdAggregate_merged.to_csv('school_choice_data/exported_data/df_districtVoucherSpecialEdAggregate_merged.csv')
District No. | District Name | Public special ed enrollment 2016 | Public special ed ratio 2016 | Public total enrollment 2016 | Special ed enrollment 2016 | Total enrollment 2016 | Special ed ratio 2016 | |
---|---|---|---|---|---|---|---|---|
0 | 15 | Adams Central Community Schools | 132 | 10.48 | 1259 | 25.0 | 297.0 | 8.42 |
1 | 25 | North Adams Community Schools | 260 | 14.37 | 1809 | 75.0 | 1069.0 | 7.02 |
2 | 35 | South Adams Schools | 195 | 15.02 | 1298 | 114.0 | 1012.0 | 11.26 |
3 | 125 | M S D Southwest Allen County Schls | 565 | 7.86 | 7190 | 629.0 | 9483.0 | 6.63 |
4 | 225 | Northwest Allen County Schools | 887 | 12.05 | 7362 | 677.0 | 10389.0 | 6.52 |
5 | 235 | Fort Wayne Community Schools | 4514 | 15.37 | 29377 | 849.0 | 13046.0 | 6.51 |
6 | 255 | East Allen County Schools | 1113 | 11.63 | 9569 | 824.0 | 12617.0 | 6.53 |
7 | 365 | Bartholomew Con School Corp | 1431 | 12.38 | 11562 | 224.0 | 3058.0 | 7.33 |
8 | 370 | Flat Rock-Hawcreek School Corp | 124 | 14.37 | 863 | 60.0 | 1025.0 | 5.85 |
9 | 395 | Benton Community School Corp | 438 | 23.41 | 1871 | 243.0 | 2362.0 | 10.29 |
10 | 515 | Blackford County Schools | 285 | 16.69 | 1708 | NaN | NaN | NaN |
11 | 615 | Western Boone Co Com Sch Dist | 279 | 16.33 | 1709 | 51.0 | 616.0 | 8.28 |
12 | 630 | Zionsville Community Schools | 813 | 11.73 | 6931 | 301.0 | 6048.0 | 4.98 |
13 | 665 | Lebanon Community School Corp | 639 | 18.61 | 3434 | 265.0 | 3406.0 | 7.78 |
14 | 670 | Brown County School Corporation | 493 | 24.64 | 2001 | 254.0 | 3214.0 | 7.90 |
15 | 750 | Carroll Consolidated School Corp | 108 | 9.73 | 1110 | 195.0 | 1877.0 | 10.39 |
16 | 755 | Delphi Community School Corp | 154 | 10.91 | 1411 | 283.0 | 3035.0 | 9.32 |
17 | 775 | Pioneer Regional School Corp | 107 | 11.75 | 911 | NaN | NaN | NaN |
18 | 815 | Southeastern School Corp | 143 | 10.68 | 1339 | 38.0 | 267.0 | 14.23 |
19 | 875 | Logansport Community Sch Corp | 499 | 11.73 | 4254 | 1.0 | 127.0 | 0.79 |
20 | 940 | West Clark Community Schools | 670 | 14.07 | 4761 | 208.0 | 3076.0 | 6.76 |
21 | 1000 | Clarksville Community School Corp | 236 | 16.83 | 1402 | 187.0 | 2733.0 | 6.84 |
22 | 1010 | Greater Clark County Schools | 1718 | 16.29 | 10544 | 208.0 | 2983.0 | 6.97 |
23 | 1125 | Clay Community Schools | 865 | 20.46 | 4228 | 4.0 | 462.0 | 0.87 |
24 | 1150 | Clinton Central School Corporation | 108 | 11.66 | 926 | 63.0 | 745.0 | 8.46 |
25 | 1160 | Clinton Prairie School Corporation | 161 | 13.39 | 1202 | 212.0 | 2333.0 | 9.09 |
26 | 1170 | Community Schools of Frankfort | 408 | 12.78 | 3193 | 164.0 | 2531.0 | 6.48 |
27 | 1180 | Rossville Con School District | 108 | 10.88 | 993 | 179.0 | 2043.0 | 8.76 |
28 | 1300 | Crawford County Community Sch Corp | 317 | 20.39 | 1555 | 19.0 | 743.0 | 2.56 |
29 | 1315 | Barr-Reeve Community Schools Inc | 87 | 10.61 | 820 | 16.0 | 172.0 | 9.30 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
361 | 9840 | Excel Center - University Heights | 44 | 11.28 | 390 | NaN | NaN | NaN |
362 | 9845 | Xavier School of Excellence | 38 | 17.76 | 214 | NaN | NaN | NaN |
363 | 9855 | Excel Center - Noblesville | 26 | 13.13 | 198 | NaN | NaN | NaN |
364 | 9865 | Hoosier Acad Virtual Charter | 435 | 13.02 | 3342 | NaN | NaN | NaN |
365 | 9870 | Discovery Charter School | 67 | 12.93 | 518 | NaN | NaN | NaN |
366 | 9875 | Rock Creek Community Academy | 141 | 27.81 | 507 | NaN | NaN | NaN |
367 | 9880 | Career Academy High School | 68 | 25.56 | 266 | NaN | NaN | NaN |
368 | 9885 | Gary Middle College | 20 | 8.44 | 237 | NaN | NaN | NaN |
369 | 9890 | Indiana Virtual School | 222 | 7.53 | 2947 | NaN | NaN | NaN |
370 | 9895 | IN Math & Science Academy - North | 84 | 15.14 | 555 | NaN | NaN | NaN |
371 | 9900 | Excel Center - South Bend | 10 | 2.78 | 360 | NaN | NaN | NaN |
372 | 9905 | Indiana Connections Academy | 631 | 15.65 | 4032 | NaN | NaN | NaN |
373 | 9910 | Excel Center for Adult Learners | 103 | 9.94 | 1036 | NaN | NaN | NaN |
374 | 9915 | Marion Academy | 47 | 27.65 | 170 | NaN | NaN | NaN |
375 | 9920 | Damar Charter Academy | 160 | 97.56 | 164 | NaN | NaN | NaN |
376 | 9925 | Phalen Leadership Academy - IN Inc | 23 | 7.42 | 310 | NaN | NaN | NaN |
377 | 9930 | Nexus Academy of Indianapolis | 22 | 16.92 | 130 | NaN | NaN | NaN |
378 | 9935 | Vision Academy | 74 | 16.86 | 439 | NaN | NaN | NaN |
379 | 9940 | Tindley Collegiate Academy | 34 | 12.64 | 269 | NaN | NaN | NaN |
380 | 9945 | Tindley Renaissance Academy | 52 | 9.56 | 544 | NaN | NaN | NaN |
381 | 9950 | Dugger Union Community School Corp | 42 | 13.73 | 306 | NaN | NaN | NaN |
382 | 9955 | Mays Community Academy | 39 | 21.67 | 180 | NaN | NaN | NaN |
383 | 9960 | Success Academy Primary School | 83 | 17.47 | 475 | NaN | NaN | NaN |
384 | 9965 | Career Academy Middle School | 83 | 20.80 | 399 | NaN | NaN | NaN |
385 | 9970 | ACE Preparatory Academy | 2 | 5.56 | 36 | NaN | NaN | NaN |
386 | 9975 | Global Preparatory Academy | 22 | 8.18 | 269 | NaN | NaN | NaN |
387 | 9980 | Steel City Academy | 23 | 15.13 | 152 | NaN | NaN | NaN |
388 | 9985 | Seven Oaks Classical School | 20 | 12.42 | 161 | NaN | NaN | NaN |
389 | 9990 | Heritage Institute of Arts & Tech | 18 | 11.76 | 153 | NaN | NaN | NaN |
390 | 9995 | Excel Center - Shelbyville | 3 | 3.57 | 84 | NaN | NaN | NaN |
391 rows × 8 columns