# import pandas
import pandas as pd
# import numpy
import numpy as np
# import plotly stuff
import plotly.plotly as py
import plotly.graph_objs as go
def readcsv(file):
# read in csv file using pandas:
data = pd.read_csv(file)
# save file as an indexable array:
array = np.array(data)
return array
# returns descriptive statistics on any csv file
def filedescribe(file):
described = pd.read_csv(file)
return described.describe()
# save each column as a variable by indexing the array:
def idcolumn (array):
id = (array[:,0])
return id
def schoolcolumn(array):
schools = (array[:,1])
return schools
def popcolumn(array):
popu = (array[:,2])
return popu
def DTPcoveragecolumn(array):
DTPcoverage = (array[:,3])
return DTPcoverage
def DTPexemptcolumn(array):
DTPexempt = (array[:,4])
return DTPexempt
def MMRcoveragecolumn(array):
MMRcover = (array[:,5])
return MMRcover
def MMRexemptcolumn(array):
MMRexempt = (array[:,6])
return MMRexempt
# all catholic schools have the string catholic in them
def find_catholic_schools(dict):
catholic_dict = {}
for (key, value) in dict.items():
if "CATHOLIC" in key:
catholic_dict.update({key:value})
else:
pass
return(catholic_dict)
# terms common in alt schools. I doubt I caught them all, some like the da vinci school are named uniquely
def find_alt_schools(dict):
alt_dict = {}
for (key, value) in dict.items():
if "ALTERNATIVE" in key:
alt_dict.update({key:value})
if "ARTS" in key:
alt_dict.update({key:value})
if "MUSIC" in key:
alt_dict.update({key:value})
if "COMMUNITY" in key:
alt_dict.update({key:value})
if "LIFE EXPERIENCE" in key:
alt_dict.update({key:value})
if "DA VINCI" in key:
alt_dict.update({key:value})
else:
pass
return(alt_dict)
# collegiate and academy signal ~fancy~
def find_fancy_schools(dict):
fancy_dict = {}
for (key, value) in dict.items():
if "COLLEGIATE" in key:
fancy_dict.update({key:value})
if "ACADEMY" in key:
fancy_dict.update({key:value})
else:
pass
return(fancy_dict)
# the best way I could figure to find french schools
def find_french_schools(dict):
french_dict = {}
for (key, value) in dict.items():
if "ECOLE" in key:
french_dict.update({key:value})
if "EEC" in key:
french_dict.update({key:value})
if "ESC" in key:
french_dict.update({key:value})
if "ELEMENTAIRE" in key:
french_dict.update({key:value})
if "SECONDAIRE" in key:
french_dict.update({key:value})
if "FRANCAIS" in key:
french_dict.update({key:value})
if "ACADEMIE" in key:
french_dict.update({key:value})
else:
pass
return(french_dict)
# finds public schools based on string matching
def find_public_schools(dict):
public_dict = {}
for (key, value) in dict.items():
if "PUBLIC" in key:
public_dict.update({key:value})
else:
pass
return(public_dict)
# makes a dictionary out of 2 lists
def create_dict(schools, MMRe):
keys = schools
values = MMRe
dict = {}
for A, B in zip(keys, values):
dict[A] = B
return dict
# it gets only the values from any dictionary
def getvalues(dictionary):
valuelist = []
for (key,value) in dictionary.items():
valuelist.append(value)
return(valuelist)
# compute and round the mean of any real or int list
def arraycompute(array):
from statistics import mean
arraymean = mean(array)
roundedmean = round(arraymean, 1)
return roundedmean
# plotting the entire toronto schools' enrollment numbers against MMR vaccine religious exemption rates
df = pd.read_csv("https://raw.githubusercontent.com/valhella/torontoimmunization/master/data/mod_immunization-coverage-2017-2018.csv")
pop_MMRe = go.Scatter(
x= df['enrolledPop'],
y= df['MMRreligousExempt'],
name = "Enrolled Population Vs. MMR Exception Rate %",
line = dict(color = 'red'),
mode = 'markers',
opacity = 0.8)
layout = go.Layout(
title=go.layout.Title(
text='Toronto School Enrollment vs. MMR Vaccine Religious Exemption Rates',
xref='paper',
x=0
),
xaxis=go.layout.XAxis(
title=go.layout.xaxis.Title(
text='Enrolled Population',
font=dict(
family='Courier New, monospace',
size=18,
color='#7f7f7f'
)
)
),
yaxis=go.layout.YAxis(
title=go.layout.yaxis.Title(
text='MMR Vaccine Religious Exemption Rate (%)',
font=dict(
family='Courier New, monospace',
size=18,
color='#7f7f7f'
)
)
)
)
fig = go.Figure(data=data, layout=layout)
data = [pop_MMRe]
py.iplot(fig)
### PLOTTING A SINGLE SCHOOL TYPE'S ENROLLMENT AGAINST MMR VACCINE RELIGIOUS EXEMPTION RATE
df = pd.read_csv("https://raw.githubusercontent.com/valhella/torontoimmunization/master/data/catholic.csv")
cat_pop_MMRe = go.Scatter(
x= df['Enrolled'],
y= df['MMRe'],
name = "Catholic Enrolled Population Vs. MMR Exception Rate %",
line = dict(color = 'red'),
mode = 'markers',
opacity = 0.8)
layout = go.Layout(
title=go.layout.Title(
text='Toronto Catholic School Enrollment vs. MMR Vaccine Religious Exemption Rates',
xref='paper',
x=0
),
xaxis=go.layout.XAxis(
title=go.layout.xaxis.Title(
text='Enrolled Population',
font=dict(
family='Courier New, monospace',
size=18,
color='#7f7f7f'
)
)
),
yaxis=go.layout.YAxis(
title=go.layout.yaxis.Title(
text='MMR Vaccine Religious Exemption Rate (%)',
font=dict(
family='Courier New, monospace',
size=18,
color='#7f7f7f'
)
)
)
)
data = [cat_pop_MMRe]
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)
### PLOTTING ALTERNATIVE SCHOOLS' ENROLLMENT AGAINST MMR VACCINE RELIGIOUS EXEMPTION RATE
df = pd.read_csv("https://raw.githubusercontent.com/valhella/torontoimmunization/master/data/alt.csv")
cat_pop_MMRe = go.Scatter(
x= df['Enrolled'],
y= df['MMRe'],
name = "Alternative School Enrolled Population Vs. MMR Exception Rate %",
line = dict(color = 'red'),
mode = 'markers',
opacity = 0.8)
layout = go.Layout(
title=go.layout.Title(
text='Toronto Alternative School Enrollment vs. MMR Vaccine Religious Exemption Rates',
xref='paper',
x=0
),
xaxis=go.layout.XAxis(
title=go.layout.xaxis.Title(
text='Enrolled Population',
font=dict(
family='Courier New, monospace',
size=18,
color='#7f7f7f'
)
)
),
yaxis=go.layout.YAxis(
title=go.layout.yaxis.Title(
text='MMR Vaccine Religious Exemption Rate (%)',
font=dict(
family='Courier New, monospace',
size=18,
color='#7f7f7f'
)
)
)
)
data = [cat_pop_MMRe]
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)
# saves a list to a txt/csv file on your computer in the open directory
def savetocsv(array):
np.savetxt('MMRe.csv', array, fmt="%d", delimiter=",")
#pd.DataFrame(array).to_csv("filepath.csv")Hey
# where the magic happens
def main():
#describe data
print(filedescribe("https://raw.githubusercontent.com/valhella/torontoimmunization/master/data/mod_immunization-coverage-2017-2018.csv"))
# an array with all data from toronto immunization data csv
# (I modified this file slightly, deleting and replacing
# all characters my computer didn't recognize.
# I also sorted the data to make it more understandable.)
savedarray = readcsv("https://raw.githubusercontent.com/valhella/torontoimmunization/master/data/mod_immunization-coverage-2017-2018.csv")
# all columns saved as their own array variables
ids = idcolumn(savedarray)
schools = schoolcolumn(savedarray)
pop = popcolumn(savedarray)
DTPc = DTPcoveragecolumn(savedarray)
DTPe = DTPexemptcolumn(savedarray)
MMRc = MMRcoveragecolumn(savedarray)
MMRe = MMRexemptcolumn(savedarray)
#----------------------------------------
# a dictionary with all school names as keys and all MMR exemption rates as values
school_MMRe_dict = create_dict(schools, MMRe)
# dictionaries for each school type as keys with their associated MMRe rates as values
catholic_schools = find_catholic_schools(school_MMRe_dict)
alt_schools = find_alt_schools(school_MMRe_dict)
fancy_schools = find_fancy_schools(school_MMRe_dict)
french_schools= find_french_schools(school_MMRe_dict)
public_schools = find_public_schools(school_MMRe_dict)
# a dictionary with all school names as keys and enrollment numbers as values
school_enrollment_dict = create_dict(schools, pop)
# dictionaries for each school type as keys and associated enrollment numbers as values
catholic_schools_enroll_dict = find_catholic_schools(school_enrollment_dict)
alt_schools_enroll_dict = find_alt_schools(school_enrollment_dict)
fancy_schools_enroll_dict = find_fancy_schools(school_enrollment_dict)
french_schools_enroll_dict = find_french_schools(school_enrollment_dict)
public_schools_enroll_dict = find_public_schools(school_enrollment_dict)
#----------------------------
# call function that takes values from a dictionary and makes them into a list
# MMR exemption rates for each school type:
catholic_values = getvalues(catholic_schools)
alt_values = getvalues(alt_schools)
fancy_values = getvalues(fancy_schools)
french_values = getvalues(french_schools)
public_values = getvalues(public_schools)
# Enrollment numbers for each school type:
catholic_enroll = getvalues(catholic_schools_enroll_dict)
alt_enroll = getvalues(alt_schools_enroll_dict)
fancy_enroll = getvalues(fancy_schools_enroll_dict)
french_enroll = getvalues(french_schools_enroll_dict)
public_enroll = getvalues(public_schools_enroll_dict)
# create files by combining selected above lists to be used as a dataframe for pandas/plotly
# must then add headings manually and upload to github, get URL and use to create plotly dataframe
np.savetxt("catholic.csv", np.column_stack((catholic_enroll, catholic_values)), delimiter=",", fmt='%s')
np.savetxt("alt.csv", np.column_stack((alt_enroll, alt_values)), delimiter=",", fmt='%s')
#--------------------------------
# call function that computes mean values from list
# Mean MMR exemption rate for each school type:
mean_catholic = arraycompute(catholic_values)
mean_alt = arraycompute(alt_values)
mean_fancy = arraycompute(fancy_values)
mean_french = arraycompute(french_values)
mean_public = arraycompute(public_values)
# print the mean MMR exemption rates for each school type
print("Catholic schools' mean MMR religious exemption rate %:", mean_catholic)
print("Alternative schools' mean MMR religious exemption rate %:", mean_alt)
print("Collegiate institutes and academy schools' mean MMR religious exemption rate %:", mean_fancy)
print("French schools' mean MMR religious exemption rate %:", mean_french)
print("Public schools' mean MMR religious exemption rate %:", mean_public)
# Mean enrollment numbers for each school type:
mean_enroll_catholic = arraycompute(catholic_enroll)
mean_enroll_alt = arraycompute(alt_enroll)
mean_enroll_fancy = arraycompute(fancy_enroll)
mean_enroll_french = arraycompute(french_enroll)
mean_enroll_public = arraycompute(public_enroll)
# print the mean enrollment values for each school type
print("Catholic schools' mean enrollment:", mean_enroll_catholic)
print("Alternative schools' mean enrollment:",mean_enroll_alt)
print("Collegiate institutes and academy schools' mean enrollment:", mean_enroll_fancy)
print("French schools' mean enrollment:", mean_enroll_french)
print("Public schools' mean enrollment:", mean_enroll_public)
main()
id enrolledPop DTPcoverage DTPreligousExempt MMRcoverage \ count 808.000000 808.000000 808.000000 808.000000 808.000000 mean 404.500000 300.456683 89.969678 2.114851 92.704950 std 233.393802 294.409468 7.297535 3.041579 4.967658 min 1.000000 15.000000 26.700000 0.000000 51.700000 25% 202.750000 120.750000 87.800000 0.600000 90.975000 50% 404.500000 199.000000 91.000000 1.400000 93.600000 75% 606.250000 349.000000 94.325000 2.725000 95.800000 max 808.000000 1887.000000 100.000000 44.100000 100.000000 MMRreligousExempt count 808.000000 mean 2.103094 std 3.044536 min 0.000000 25% 0.575000 50% 1.400000 75% 2.725000 max 44.100000 Catholic schools' mean MMR religious exemption rate %: 1.3 Alternative schools' mean MMR religious exemption rate %: 6.9 Collegiate institutes and academy schools' mean MMR religious exemption rate %: 1.9 French schools' mean MMR religious exemption rate %: 2.2 Public schools' mean MMR religious exemption rate %: 1.8 Catholic schools' mean enrollment: 302.7 Alternative schools' mean enrollment: 175.2 Collegiate institutes and academy schools' mean enrollment: 804.3 French schools' mean enrollment: 132.6 Public schools' mean enrollment: 208.4