#!/usr/bin/env python # coding: utf-8 #
Using the public data available at NYC OpenData, I downloaded two data sets, one that only had the DBN codes associated with the schools along with the statistics of college enrollment, and another which was a directory of high schools (name, street address, city..etc). I combined what I needed from the two and then converted each address into coordinates using Google's Geocoding API. I then mapped each school into a city map based on their coordinates and color coded them based on their college enrollment, thus painting an image out of the data.
#At this point, the data set is almost how I want it to be. I then began converting the school's full address into coordinates of latitude and longitude to add into the empty columns seen above.
# In[325]: # 1. Add coords into new columns # first run will give us some with coords and some without due to being missed by the requests # then we will run a similar func until all have coords for idx, row in fullProfilesv2.iterrows(): try: # sleep is to avoid sending too many requests quickly schoolAddress = row["Primary_Address_Line_1"] + ", " + row["City"] \ + ", " + row["State_Code"] schoolAddressEdit = schoolAddress.replace(" ", "+") r = requests.get("https://maps.googleapis.com/maps/api/geocode/json?address={}key=AIzaSyBU828rT3AJLuU4LtZksyB2iWH63oBLCsU".format(schoolAddressEdit)) jsonInfo = r.json() locationDict = jsonInfo['results'][0]['geometry']['location'] row["Latitude"] = locationDict['lat'] row["Longitude"] = locationDict['lng'] sleep(2) except IndexError: row["Latitude"] = "missed" row["Longitude"] = "missed" # In[387]: # runs until all have coordinates, then we can finally map by retrieving them # have to do it this way to meet API's limit (cus im not paying to get more requests, im broke) for idx, row in fullProfilesv2.iterrows(): try: if row["Latitude"] == "missed": schoolAddress = row["Primary_Address_Line_1"] + ", " + row["City"] \ + ", " + row["State_Code"] schoolAddressEdit = schoolAddress.replace(" ", "+") r = requests.get("https://maps.googleapis.com/maps/api/geocode/json?address={}key=AIzaSyBU828rT3AJLuU4LtZksyB2iWH63oBLCsU".format(schoolAddressEdit)) jsonInfo = r.json() locationDict = jsonInfo['results'][0]['geometry']['location'] row["Latitude"] = locationDict['lat'] row["Longitude"] = locationDict['lng'] sleep(2) except IndexError: pass print(fullProfilesv2.Latitude[fullProfilesv2.Latitude == "missed"].count()) # In[388]: print(fullProfilesv2.Latitude[fullProfilesv2.Latitude == "missed"].count()) print(any(fullProfilesv2.Latitude == "missed")) fullProfilesv2 #Here, I had gotten all of the necessary parts that I needed. Thus, I began to create a map through folium, a nice library for interactive maps. With each cell in my data set, I evaluated its statistic using a criteria I made, and placed it on the map with a specific color.
# In[392]: fullProfilesv2.to_csv("schoolCompleteData.csv", encoding='utf-8') # In[401]: city_map = folium.Map(location=[40.738, -73.98], zoom_start=11, tiles="CartoDB dark_matter") # this fixes our problem of the requests missing schools thus not having all coordinates to map at once # maps each school onto map with a color based on their stat boroughsShown = {"BX": 0, "BK": 0, "Manhattan": 0, "Queens": 0, "SI": 0} boroughsShown["BX"] = fullProfilesv2.Boro[fullProfilesv2.Boro == "X"].count() boroughsShown["BK"] = fullProfilesv2.Boro[fullProfilesv2.Boro == "K"].count() boroughsShown["SI"] = fullProfilesv2.Boro[fullProfilesv2.Boro == "R"].count() boroughsShown["Manhattan"] = fullProfilesv2.Boro[fullProfilesv2.Boro == "M"].count() boroughsShown["Queens"] = fullProfilesv2.Boro[fullProfilesv2.Boro == "Q"].count() for boro, count in boroughsShown.items(): print("{} represented by {} schools.".format(boro, count)) for idx, row in fullProfilesv2.iterrows(): if row["Latitude"] != "missed": stat = float(row["collegeStat"].replace("%", "")) if stat >= 90: color = "#0000A0" # dark blue elif stat >= 75 and stat < 90: color = "#736AFF" # light blue elif stat >= 65 and stat < 75: color = "FFFFFF" # white elif stat >= 50 and stat < 65: color = "#FF0099" # pink elif stat < 50: color = "#FF0000" # red # popup_text = "{}