Notebook

Python Party: An Investigation of the Toronto 2017-2018 Schoolchildren Immunization Coverage Dataset.¶

Val Masters¶

7 June 2019¶

In [125]:

# import pandas
import pandas as pd
# import numpy
import numpy as np
# import plotly stuff
import plotly.plotly as py
import plotly.graph_objs as go

In [126]:

def readcsv(file):
    # read in csv file using pandas:
    data = pd.read_csv(file)
    # save file as an indexable array:
    array = np.array(data)
    return array

In [127]:

# returns descriptive statistics on any csv file
def filedescribe(file):
    described = pd.read_csv(file)
    return described.describe()

In [128]:

# save each column as a variable by indexing the array:
def idcolumn (array):
    id = (array[:,0])
    return id
def schoolcolumn(array):
    schools = (array[:,1])
    return schools
def popcolumn(array):
    popu = (array[:,2])
    return popu
def DTPcoveragecolumn(array):
    DTPcoverage = (array[:,3])
    return DTPcoverage
def DTPexemptcolumn(array):
    DTPexempt = (array[:,4])
    return DTPexempt
def MMRcoveragecolumn(array):
    MMRcover = (array[:,5])
    return MMRcover
def MMRexemptcolumn(array):
    MMRexempt = (array[:,6])
    return MMRexempt

In [129]:

# all catholic schools have the string catholic in them
def find_catholic_schools(dict):
    catholic_dict = {}
    for (key, value) in dict.items():
        if "CATHOLIC" in key:
            catholic_dict.update({key:value})
        else:
            pass
    return(catholic_dict)

In [130]:

# terms common in alt schools. I doubt I caught them all, some like the da vinci school are named uniquely
def find_alt_schools(dict):
    alt_dict = {}
    for (key, value) in dict.items():
        if "ALTERNATIVE" in key: 
            alt_dict.update({key:value})
        if "ARTS" in key:
            alt_dict.update({key:value})
        if "MUSIC" in key:
            alt_dict.update({key:value})
        if "COMMUNITY" in key:
            alt_dict.update({key:value})
        if "LIFE EXPERIENCE" in key:
            alt_dict.update({key:value})
        if "DA VINCI" in key:
            alt_dict.update({key:value})
        else:
            pass
    return(alt_dict)

In [131]:

# collegiate and academy signal ~fancy~
def find_fancy_schools(dict):
    fancy_dict = {}
    for (key, value) in dict.items():
        if "COLLEGIATE" in key:
            fancy_dict.update({key:value})
        if "ACADEMY" in key:
            fancy_dict.update({key:value})
        else:
            pass
    return(fancy_dict)

In [132]:

# the best way I could figure to find french schools
def find_french_schools(dict):
    french_dict = {}
    for (key, value) in dict.items():
        if "ECOLE" in key:
            french_dict.update({key:value})
        if "EEC" in key:
            french_dict.update({key:value})
        if "ESC" in key:
            french_dict.update({key:value})
        if "ELEMENTAIRE" in key:
            french_dict.update({key:value})
        if "SECONDAIRE" in key:
            french_dict.update({key:value})
        if "FRANCAIS" in key:
            french_dict.update({key:value})
        if "ACADEMIE" in key:
            french_dict.update({key:value})
        else:
            pass
    return(french_dict)

In [133]:

# finds public schools based on string matching
def find_public_schools(dict):
    public_dict = {}
    for (key, value) in dict.items():
        if "PUBLIC" in key:
            public_dict.update({key:value})
        else:
            pass
    return(public_dict)

In [134]:

# makes a dictionary out of 2 lists
def create_dict(schools, MMRe):
    keys = schools
    values = MMRe
    dict = {}
    for A, B in zip(keys, values):
        dict[A] = B
    return dict

In [135]:

# it gets only the values from any dictionary
def getvalues(dictionary):
    valuelist = []
    for (key,value) in dictionary.items():
        valuelist.append(value)
    return(valuelist)

In [136]:

# compute and round the mean of any real or int list
def arraycompute(array):
    from statistics import mean 
    arraymean = mean(array)
    roundedmean = round(arraymean, 1)
    return roundedmean

In [137]:

# plotting the entire toronto schools' enrollment numbers against MMR vaccine religious exemption rates

df = pd.read_csv("https://raw.githubusercontent.com/valhella/torontoimmunization/master/data/mod_immunization-coverage-2017-2018.csv")
    
pop_MMRe = go.Scatter(
    x= df['enrolledPop'],
    y= df['MMRreligousExempt'],
    name = "Enrolled Population Vs. MMR Exception Rate %",
    line = dict(color = 'red'),
    mode = 'markers',
    opacity = 0.8)

layout = go.Layout(
    title=go.layout.Title(
        text='Toronto School Enrollment vs. MMR Vaccine Religious Exemption Rates',
        xref='paper',
        x=0
    ),
    xaxis=go.layout.XAxis(
        title=go.layout.xaxis.Title(
            text='Enrolled Population',
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    ),
    yaxis=go.layout.YAxis(
        title=go.layout.yaxis.Title(
            text='MMR Vaccine Religious Exemption Rate (%)',
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    )
)
fig = go.Figure(data=data, layout=layout)
data = [pop_MMRe]
                   
py.iplot(fig)

Out[137]:

In [138]:

### PLOTTING A SINGLE SCHOOL TYPE'S ENROLLMENT AGAINST MMR VACCINE RELIGIOUS EXEMPTION RATE

df = pd.read_csv("https://raw.githubusercontent.com/valhella/torontoimmunization/master/data/catholic.csv")
    
cat_pop_MMRe = go.Scatter(
    x= df['Enrolled'],
    y= df['MMRe'],
    name = "Catholic Enrolled Population Vs. MMR Exception Rate %",
    line = dict(color = 'red'),
    mode = 'markers',
    opacity = 0.8)

layout = go.Layout(
    title=go.layout.Title(
        text='Toronto Catholic School Enrollment vs. MMR Vaccine Religious Exemption Rates',
        xref='paper',
        x=0
    ),
    xaxis=go.layout.XAxis(
        title=go.layout.xaxis.Title(
            text='Enrolled Population',
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    ),
    yaxis=go.layout.YAxis(
        title=go.layout.yaxis.Title(
            text='MMR Vaccine Religious Exemption Rate (%)',
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    )
)

data = [cat_pop_MMRe]
fig = go.Figure(data=data, layout=layout)          

py.iplot(fig)

Out[138]:

In [139]:

### PLOTTING ALTERNATIVE SCHOOLS' ENROLLMENT AGAINST MMR VACCINE RELIGIOUS EXEMPTION RATE

df = pd.read_csv("https://raw.githubusercontent.com/valhella/torontoimmunization/master/data/alt.csv")
    
cat_pop_MMRe = go.Scatter(
    x= df['Enrolled'],
    y= df['MMRe'],
    name = "Alternative School Enrolled Population Vs. MMR Exception Rate %",
    line = dict(color = 'red'),
    mode = 'markers',
    opacity = 0.8)

layout = go.Layout(
    title=go.layout.Title(
        text='Toronto Alternative School Enrollment vs. MMR Vaccine Religious Exemption Rates',
        xref='paper',
        x=0
    ),
    xaxis=go.layout.XAxis(
        title=go.layout.xaxis.Title(
            text='Enrolled Population',
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    ),
    yaxis=go.layout.YAxis(
        title=go.layout.yaxis.Title(
            text='MMR Vaccine Religious Exemption Rate (%)',
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    )
)

data = [cat_pop_MMRe]
fig = go.Figure(data=data, layout=layout)

py.iplot(fig)

Out[139]:

In [140]:

# saves a list to a txt/csv file on your computer in the open directory
def savetocsv(array):
    np.savetxt('MMRe.csv', array, fmt="%d", delimiter=",")
    #pd.DataFrame(array).to_csv("filepath.csv")Hey 

In [142]:

# where the magic happens
def main():
    
    #describe data
    print(filedescribe("https://raw.githubusercontent.com/valhella/torontoimmunization/master/data/mod_immunization-coverage-2017-2018.csv"))
    
    # an array with all data from toronto immunization data csv
    # (I modified this file slightly, deleting and replacing
    # all characters my computer didn't recognize. 
    # I also sorted the data to make it more understandable.)
    savedarray = readcsv("https://raw.githubusercontent.com/valhella/torontoimmunization/master/data/mod_immunization-coverage-2017-2018.csv")
   
    # all columns saved as their own array variables
    ids = idcolumn(savedarray)
    schools = schoolcolumn(savedarray)
    pop = popcolumn(savedarray)
    DTPc = DTPcoveragecolumn(savedarray)
    DTPe = DTPexemptcolumn(savedarray)
    MMRc = MMRcoveragecolumn(savedarray)
    MMRe = MMRexemptcolumn(savedarray)
    
    
    #----------------------------------------
    # a dictionary with all school names as keys and all MMR exemption rates as values
    school_MMRe_dict = create_dict(schools, MMRe)
    
    # dictionaries for each school type as keys with their associated MMRe rates as values
    catholic_schools = find_catholic_schools(school_MMRe_dict)
    alt_schools = find_alt_schools(school_MMRe_dict)
    fancy_schools = find_fancy_schools(school_MMRe_dict)
    french_schools= find_french_schools(school_MMRe_dict)
    public_schools = find_public_schools(school_MMRe_dict)
    
    # a dictionary with all school names as keys and enrollment numbers as values
    school_enrollment_dict = create_dict(schools, pop)

    # dictionaries for each school type as keys and associated enrollment numbers as values
    catholic_schools_enroll_dict = find_catholic_schools(school_enrollment_dict)
    alt_schools_enroll_dict = find_alt_schools(school_enrollment_dict)
    fancy_schools_enroll_dict = find_fancy_schools(school_enrollment_dict)
    french_schools_enroll_dict = find_french_schools(school_enrollment_dict)
    public_schools_enroll_dict = find_public_schools(school_enrollment_dict)
    
    #----------------------------
    
    # call function that takes values from a dictionary and makes them into a list
    
    # MMR exemption rates for each school type:
    catholic_values = getvalues(catholic_schools) 
    alt_values = getvalues(alt_schools)
    fancy_values = getvalues(fancy_schools)
    french_values = getvalues(french_schools)
    public_values = getvalues(public_schools)
    
    # Enrollment numbers for each school type:
    catholic_enroll  = getvalues(catholic_schools_enroll_dict)
    alt_enroll = getvalues(alt_schools_enroll_dict)
    fancy_enroll = getvalues(fancy_schools_enroll_dict)
    french_enroll = getvalues(french_schools_enroll_dict)
    public_enroll = getvalues(public_schools_enroll_dict)
    
    # create files by combining selected above lists to be used as a dataframe for pandas/plotly
    # must then add headings manually and upload to github, get URL and use to create plotly dataframe
    np.savetxt("catholic.csv", np.column_stack((catholic_enroll, catholic_values)), delimiter=",", fmt='%s')
    np.savetxt("alt.csv", np.column_stack((alt_enroll, alt_values)), delimiter=",", fmt='%s')
       
    
    #--------------------------------
    
    # call function that computes mean values from list
    
    # Mean MMR exemption rate for each school type:
    mean_catholic = arraycompute(catholic_values) 
    mean_alt = arraycompute(alt_values)
    mean_fancy = arraycompute(fancy_values)
    mean_french = arraycompute(french_values)
    mean_public = arraycompute(public_values)
    
    # print the mean MMR exemption rates for each school type
    print("Catholic schools' mean MMR religious exemption rate %:", mean_catholic)
    print("Alternative schools' mean MMR religious exemption rate %:", mean_alt)
    print("Collegiate institutes and academy schools' mean MMR religious exemption rate %:", mean_fancy)
    print("French schools' mean MMR religious exemption rate %:", mean_french)
    print("Public schools' mean MMR religious exemption rate %:", mean_public)
    
    # Mean enrollment numbers for each school type:
    mean_enroll_catholic = arraycompute(catholic_enroll)
    mean_enroll_alt = arraycompute(alt_enroll)
    mean_enroll_fancy = arraycompute(fancy_enroll)
    mean_enroll_french = arraycompute(french_enroll)
    mean_enroll_public = arraycompute(public_enroll)

    # print the mean enrollment values for each school type
    print("Catholic schools' mean enrollment:", mean_enroll_catholic)
    print("Alternative schools' mean enrollment:",mean_enroll_alt)
    print("Collegiate institutes and academy schools' mean enrollment:", mean_enroll_fancy)
    print("French schools' mean enrollment:", mean_enroll_french)
    print("Public schools' mean enrollment:", mean_enroll_public)
    
main()

               id  enrolledPop  DTPcoverage  DTPreligousExempt  MMRcoverage  \
count  808.000000   808.000000   808.000000         808.000000   808.000000   
mean   404.500000   300.456683    89.969678           2.114851    92.704950   
std    233.393802   294.409468     7.297535           3.041579     4.967658   
min      1.000000    15.000000    26.700000           0.000000    51.700000   
25%    202.750000   120.750000    87.800000           0.600000    90.975000   
50%    404.500000   199.000000    91.000000           1.400000    93.600000   
75%    606.250000   349.000000    94.325000           2.725000    95.800000   
max    808.000000  1887.000000   100.000000          44.100000   100.000000   

       MMRreligousExempt  
count         808.000000  
mean            2.103094  
std             3.044536  
min             0.000000  
25%             0.575000  
50%             1.400000  
75%             2.725000  
max            44.100000  
Catholic schools' mean MMR religious exemption rate %: 1.3
Alternative schools' mean MMR religious exemption rate %: 6.9
Collegiate institutes and academy schools' mean MMR religious exemption rate %: 1.9
French schools' mean MMR religious exemption rate %: 2.2
Public schools' mean MMR religious exemption rate %: 1.8
Catholic schools' mean enrollment: 302.7
Alternative schools' mean enrollment: 175.2
Collegiate institutes and academy schools' mean enrollment: 804.3
French schools' mean enrollment: 132.6
Public schools' mean enrollment: 208.4

In [ ]: