import pandas as pd
# import CCVI & census data csv
df_raw = pd.read_csv('https://storage.googleapis.com/ccvi_county/ccvi_county.csv') #, index_col='[index]')
df_raw.head()
State | State Abbreviation | County | FIPS (5-digit) | THEME 1: Socioeconomic Status | THEME 2:\nHousehold Composition & Disability | THEME 3: \nMinority Status & Language | THEME 4: \nHousing Type & Transportation | THEME 5: Epidemiological Factors | THEME 6: Healthcare System Factors | CCVI SCORE\nHigher = More Vulnerable | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | Alabama | AL | Autauga | 1001 | 0.357 | 0.566 | 0.592 | 0.314 | 0.833 | 0.935 | 0.777 |
1 | Alabama | AL | Baldwin | 1003 | 0.240 | 0.244 | 0.432 | 0.335 | 0.380 | 0.768 | 0.362 |
2 | Alabama | AL | Barbour | 1005 | 0.973 | 0.839 | 0.897 | 0.640 | 0.778 | 0.936 | 0.991 |
3 | Alabama | AL | Bibb | 1007 | 0.746 | 0.249 | 0.443 | 0.743 | 0.726 | 0.927 | 0.775 |
4 | Alabama | AL | Blount | 1009 | 0.635 | 0.432 | 0.583 | 0.322 | 0.630 | 0.509 | 0.612 |
# filter out California data
# surpress SettingWithCopyError message
pd.options.mode.chained_assignment = None
ca_df_raw = df_raw.loc[df_raw['State Abbreviation'] == "CA"]
ca_df_raw.head()
State | State Abbreviation | County | FIPS (5-digit) | THEME 1: Socioeconomic Status | THEME 2:\nHousehold Composition & Disability | THEME 3: \nMinority Status & Language | THEME 4: \nHousing Type & Transportation | THEME 5: Epidemiological Factors | THEME 6: Healthcare System Factors | CCVI SCORE\nHigher = More Vulnerable | |
---|---|---|---|---|---|---|---|---|---|---|---|
186 | California | CA | Alameda | 6001 | 0.184 | 0.028 | 0.971 | 0.604 | 0.093 | 0.022 | 0.097 |
187 | California | CA | Alpine | 6003 | 0.813 | 0.787 | 0.747 | 0.989 | 0.288 | 0.003 | 0.515 |
188 | California | CA | Amador | 6005 | 0.387 | 0.303 | 0.612 | 0.593 | 0.396 | 0.006 | 0.156 |
189 | California | CA | Butte | 6007 | 0.607 | 0.327 | 0.776 | 0.758 | 0.057 | 0.111 | 0.235 |
190 | California | CA | Calaveras | 6009 | 0.362 | 0.639 | 0.548 | 0.126 | 0.230 | 0.032 | 0.098 |
# check if this is has the correct number of counties. California has 58 counties
len(ca_df_raw)
58
# add covid information for each county. CC stands for confirmed cases
df_covid_cc_US = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")
df_covid_cc_CA = df_covid_cc_US.loc[df_covid_cc_US['Province_State'] == "California"]
# check if we have 58 counties
#len(df_covid_cc_CA)
#60
df_covid_cc_CA.head()
UID | iso2 | iso3 | code3 | FIPS | Admin2 | Province_State | Country_Region | Lat | Long_ | Combined_Key | 1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | 1/28/20 | 1/29/20 | 1/30/20 | 1/31/20 | 2/1/20 | 2/2/20 | 2/3/20 | 2/4/20 | 2/5/20 | 2/6/20 | 2/7/20 | 2/8/20 | 2/9/20 | 2/10/20 | 2/11/20 | 2/12/20 | 2/13/20 | 2/14/20 | 2/15/20 | 2/16/20 | 2/17/20 | 2/18/20 | 2/19/20 | ... | 1/12/21 | 1/13/21 | 1/14/21 | 1/15/21 | 1/16/21 | 1/17/21 | 1/18/21 | 1/19/21 | 1/20/21 | 1/21/21 | 1/22/21 | 1/23/21 | 1/24/21 | 1/25/21 | 1/26/21 | 1/27/21 | 1/28/21 | 1/29/21 | 1/30/21 | 1/31/21 | 2/1/21 | 2/2/21 | 2/3/21 | 2/4/21 | 2/5/21 | 2/6/21 | 2/7/21 | 2/8/21 | 2/9/21 | 2/10/21 | 2/11/21 | 2/12/21 | 2/13/21 | 2/14/21 | 2/15/21 | 2/16/21 | 2/17/21 | 2/18/21 | 2/19/21 | 2/20/21 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
195 | 84006001 | US | USA | 840 | 6001.0 | Alameda | California | US | 37.646294 | -121.892927 | Alameda, California, US | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 62046 | 62943 | 63866 | 64760 | 65679 | 68016 | 67375 | 67952 | 68649 | 69107 | 69554 | 69693 | 70334 | 70823 | 71298 | 72024 | 72597 | 73115 | 73542 | 73771 | 74087 | 74661 | 74959 | 75175 | 78224 | 75737 | 76170 | 76648 | 77109 | 77216 | 77632 | 77926 | 78285 | 78356 | 78714 | 78883 | 79016 | 79092 | 79297 | 79471 |
196 | 84006003 | US | USA | 840 | 6003.0 | Alpine | California | US | 38.596786 | -119.822359 | Alpine, California, US | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 74 | 74 | 74 | 74 | 74 | 74 | 74 | 76 | 78 | 78 | 72 | 78 | 78 | 79 | 79 | 79 | 79 | 79 | 79 | 79 | 79 | 79 | 79 | 79 | 80 | 80 |
197 | 84006005 | US | USA | 840 | 6005.0 | Amador | California | US | 38.445831 | -120.656960 | Amador, California, US | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 2867 | 2897 | 2921 | 2951 | 2965 | 2986 | 3001 | 3016 | 3024 | 3036 | 3038 | 3042 | 3049 | 3067 | 3074 | 3113 | 3116 | 3147 | 3156 | 3199 | 3227 | 3234 | 3275 | 3281 | 3288 | 3299 | 3306 | 3343 | 3344 | 3347 | 3348 | 3368 | 3381 | 3387 | 3398 | 3409 | 3413 | 3415 | 3417 | 3427 |
198 | 84006007 | US | USA | 840 | 6007.0 | Butte | California | US | 39.667278 | -121.600525 | Butte, California, US | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 8795 | 8891 | 9013 | 9127 | 9233 | 9359 | 9411 | 9474 | 9528 | 9582 | 9643 | 9711 | 9757 | 9820 | 9865 | 9943 | 9988 | 10037 | 10122 | 10177 | 10215 | 10263 | 10307 | 10355 | 10386 | 10421 | 10480 | 10513 | 10524 | 10563 | 10594 | 10624 | 10655 | 10699 | 10717 | 10723 | 10748 | 10770 | 10791 | 10823 |
199 | 84006009 | US | USA | 840 | 6009.0 | Calaveras | California | US | 38.205371 | -120.552913 | Calaveras, California, US | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1460 | 1475 | 1493 | 1513 | 1538 | 1596 | 1608 | 1617 | 1621 | 1637 | 1647 | 1672 | 1695 | 1704 | 1713 | 1723 | 1737 | 1747 | 1757 | 1763 | 1778 | 1785 | 1786 | 1799 | 1808 | 1818 | 1832 | 1837 | 1840 | 1847 | 1850 | 1856 | 1860 | 1863 | 1866 | 1871 | 1899 | 1896 | 1898 | 1898 |
5 rows × 407 columns
# need to clean the data and find the 2 extra records in df_covid_cc_CA
# first grab all the county names in combined_key and remove the California, US to get the County names.
df_covid_cc_CA['Combined_Key'] = df_covid_cc_CA['Combined_Key'].apply(lambda x: x.split(",")[0])
df_covid_cc_CA.head()
UID | iso2 | iso3 | code3 | FIPS | Admin2 | Province_State | Country_Region | Lat | Long_ | Combined_Key | 1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | 1/28/20 | 1/29/20 | 1/30/20 | 1/31/20 | 2/1/20 | 2/2/20 | 2/3/20 | 2/4/20 | 2/5/20 | 2/6/20 | 2/7/20 | 2/8/20 | 2/9/20 | 2/10/20 | 2/11/20 | 2/12/20 | 2/13/20 | 2/14/20 | 2/15/20 | 2/16/20 | 2/17/20 | 2/18/20 | 2/19/20 | ... | 1/12/21 | 1/13/21 | 1/14/21 | 1/15/21 | 1/16/21 | 1/17/21 | 1/18/21 | 1/19/21 | 1/20/21 | 1/21/21 | 1/22/21 | 1/23/21 | 1/24/21 | 1/25/21 | 1/26/21 | 1/27/21 | 1/28/21 | 1/29/21 | 1/30/21 | 1/31/21 | 2/1/21 | 2/2/21 | 2/3/21 | 2/4/21 | 2/5/21 | 2/6/21 | 2/7/21 | 2/8/21 | 2/9/21 | 2/10/21 | 2/11/21 | 2/12/21 | 2/13/21 | 2/14/21 | 2/15/21 | 2/16/21 | 2/17/21 | 2/18/21 | 2/19/21 | 2/20/21 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
195 | 84006001 | US | USA | 840 | 6001.0 | Alameda | California | US | 37.646294 | -121.892927 | Alameda | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 62046 | 62943 | 63866 | 64760 | 65679 | 68016 | 67375 | 67952 | 68649 | 69107 | 69554 | 69693 | 70334 | 70823 | 71298 | 72024 | 72597 | 73115 | 73542 | 73771 | 74087 | 74661 | 74959 | 75175 | 78224 | 75737 | 76170 | 76648 | 77109 | 77216 | 77632 | 77926 | 78285 | 78356 | 78714 | 78883 | 79016 | 79092 | 79297 | 79471 |
196 | 84006003 | US | USA | 840 | 6003.0 | Alpine | California | US | 38.596786 | -119.822359 | Alpine | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 74 | 74 | 74 | 74 | 74 | 74 | 74 | 76 | 78 | 78 | 72 | 78 | 78 | 79 | 79 | 79 | 79 | 79 | 79 | 79 | 79 | 79 | 79 | 79 | 80 | 80 |
197 | 84006005 | US | USA | 840 | 6005.0 | Amador | California | US | 38.445831 | -120.656960 | Amador | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 2867 | 2897 | 2921 | 2951 | 2965 | 2986 | 3001 | 3016 | 3024 | 3036 | 3038 | 3042 | 3049 | 3067 | 3074 | 3113 | 3116 | 3147 | 3156 | 3199 | 3227 | 3234 | 3275 | 3281 | 3288 | 3299 | 3306 | 3343 | 3344 | 3347 | 3348 | 3368 | 3381 | 3387 | 3398 | 3409 | 3413 | 3415 | 3417 | 3427 |
198 | 84006007 | US | USA | 840 | 6007.0 | Butte | California | US | 39.667278 | -121.600525 | Butte | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 8795 | 8891 | 9013 | 9127 | 9233 | 9359 | 9411 | 9474 | 9528 | 9582 | 9643 | 9711 | 9757 | 9820 | 9865 | 9943 | 9988 | 10037 | 10122 | 10177 | 10215 | 10263 | 10307 | 10355 | 10386 | 10421 | 10480 | 10513 | 10524 | 10563 | 10594 | 10624 | 10655 | 10699 | 10717 | 10723 | 10748 | 10770 | 10791 | 10823 |
199 | 84006009 | US | USA | 840 | 6009.0 | Calaveras | California | US | 38.205371 | -120.552913 | Calaveras | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1460 | 1475 | 1493 | 1513 | 1538 | 1596 | 1608 | 1617 | 1621 | 1637 | 1647 | 1672 | 1695 | 1704 | 1713 | 1723 | 1737 | 1747 | 1757 | 1763 | 1778 | 1785 | 1786 | 1799 | 1808 | 1818 | 1832 | 1837 | 1840 | 1847 | 1850 | 1856 | 1860 | 1863 | 1866 | 1871 | 1899 | 1896 | 1898 | 1898 |
5 rows × 407 columns
print(len(df_covid_cc_CA.Combined_Key.values))
print(len(ca_df_raw.County.values))
60 58
lst1 = df_covid_cc_CA.Combined_Key.values
lst2 = ca_df_raw.County.values
new_list = [
list(set(lst1).difference(lst2))
]
new_list
# We see covid confirmed cases US has two rows that we have to remove 'Out of CA', 'Unassigned'
[['Out of CA', 'Unassigned']]
# extract rows 'Out of CA', 'Unassigned'
df_covid_cc_CA.drop(df_covid_cc_CA.loc[df_covid_cc_CA['Combined_Key']=="Out of CA"].index, inplace=True)
df_covid_cc_CA.drop(df_covid_cc_CA.loc[df_covid_cc_CA['Combined_Key']=="Unassigned"].index, inplace=True)
len(df_covid_cc_CA)
58
# Check if we have identical Counties
import numpy as np
check_counties = (np.array(ca_df_raw.County.values) == np.array(df_covid_cc_CA.Combined_Key.values))
check_counties
array([ True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True])
# creata total_cases row
df_covid_cc_CA['total_cases']= df_covid_cc_CA[df_covid_cc_CA.columns[-1]]
df_covid_cc_CA.head()
UID | iso2 | iso3 | code3 | FIPS | Admin2 | Province_State | Country_Region | Lat | Long_ | Combined_Key | 1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | 1/28/20 | 1/29/20 | 1/30/20 | 1/31/20 | 2/1/20 | 2/2/20 | 2/3/20 | 2/4/20 | 2/5/20 | 2/6/20 | 2/7/20 | 2/8/20 | 2/9/20 | 2/10/20 | 2/11/20 | 2/12/20 | 2/13/20 | 2/14/20 | 2/15/20 | 2/16/20 | 2/17/20 | 2/18/20 | 2/19/20 | ... | 1/13/21 | 1/14/21 | 1/15/21 | 1/16/21 | 1/17/21 | 1/18/21 | 1/19/21 | 1/20/21 | 1/21/21 | 1/22/21 | 1/23/21 | 1/24/21 | 1/25/21 | 1/26/21 | 1/27/21 | 1/28/21 | 1/29/21 | 1/30/21 | 1/31/21 | 2/1/21 | 2/2/21 | 2/3/21 | 2/4/21 | 2/5/21 | 2/6/21 | 2/7/21 | 2/8/21 | 2/9/21 | 2/10/21 | 2/11/21 | 2/12/21 | 2/13/21 | 2/14/21 | 2/15/21 | 2/16/21 | 2/17/21 | 2/18/21 | 2/19/21 | 2/20/21 | total_cases | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
195 | 84006001 | US | USA | 840 | 6001.0 | Alameda | California | US | 37.646294 | -121.892927 | Alameda | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 62943 | 63866 | 64760 | 65679 | 68016 | 67375 | 67952 | 68649 | 69107 | 69554 | 69693 | 70334 | 70823 | 71298 | 72024 | 72597 | 73115 | 73542 | 73771 | 74087 | 74661 | 74959 | 75175 | 78224 | 75737 | 76170 | 76648 | 77109 | 77216 | 77632 | 77926 | 78285 | 78356 | 78714 | 78883 | 79016 | 79092 | 79297 | 79471 | 79471 |
196 | 84006003 | US | USA | 840 | 6003.0 | Alpine | California | US | 38.596786 | -119.822359 | Alpine | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 73 | 74 | 74 | 74 | 74 | 74 | 74 | 74 | 76 | 78 | 78 | 72 | 78 | 78 | 79 | 79 | 79 | 79 | 79 | 79 | 79 | 79 | 79 | 79 | 79 | 80 | 80 | 80 |
197 | 84006005 | US | USA | 840 | 6005.0 | Amador | California | US | 38.445831 | -120.656960 | Amador | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 2897 | 2921 | 2951 | 2965 | 2986 | 3001 | 3016 | 3024 | 3036 | 3038 | 3042 | 3049 | 3067 | 3074 | 3113 | 3116 | 3147 | 3156 | 3199 | 3227 | 3234 | 3275 | 3281 | 3288 | 3299 | 3306 | 3343 | 3344 | 3347 | 3348 | 3368 | 3381 | 3387 | 3398 | 3409 | 3413 | 3415 | 3417 | 3427 | 3427 |
198 | 84006007 | US | USA | 840 | 6007.0 | Butte | California | US | 39.667278 | -121.600525 | Butte | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 8891 | 9013 | 9127 | 9233 | 9359 | 9411 | 9474 | 9528 | 9582 | 9643 | 9711 | 9757 | 9820 | 9865 | 9943 | 9988 | 10037 | 10122 | 10177 | 10215 | 10263 | 10307 | 10355 | 10386 | 10421 | 10480 | 10513 | 10524 | 10563 | 10594 | 10624 | 10655 | 10699 | 10717 | 10723 | 10748 | 10770 | 10791 | 10823 | 10823 |
199 | 84006009 | US | USA | 840 | 6009.0 | Calaveras | California | US | 38.205371 | -120.552913 | Calaveras | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1475 | 1493 | 1513 | 1538 | 1596 | 1608 | 1617 | 1621 | 1637 | 1647 | 1672 | 1695 | 1704 | 1713 | 1723 | 1737 | 1747 | 1757 | 1763 | 1778 | 1785 | 1786 | 1799 | 1808 | 1818 | 1832 | 1837 | 1840 | 1847 | 1850 | 1856 | 1860 | 1863 | 1866 | 1871 | 1899 | 1896 | 1898 | 1898 | 1898 |
5 rows × 408 columns
# Check total confirmed total cases in CA
df_covid_cc_CA["total_cases"].sum()
3527338
# Now merge Lat, Lon_ and total cases with ca_df_raw
ca_df_raw["Longitude"] = df_covid_cc_CA.Long_.values
ca_df_raw["Latitude"] = df_covid_cc_CA.Lat.values
ca_df_raw["Total Covid19 Cases"] = df_covid_cc_CA.total_cases.values
ca_df_raw.head()
State | State Abbreviation | County | FIPS (5-digit) | THEME 1: Socioeconomic Status | THEME 2:\nHousehold Composition & Disability | THEME 3: \nMinority Status & Language | THEME 4: \nHousing Type & Transportation | THEME 5: Epidemiological Factors | THEME 6: Healthcare System Factors | CCVI SCORE\nHigher = More Vulnerable | Longitude | Latitude | Total Covid19 Cases | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
186 | California | CA | Alameda | 6001 | 0.184 | 0.028 | 0.971 | 0.604 | 0.093 | 0.022 | 0.097 | -121.892927 | 37.646294 | 79471 |
187 | California | CA | Alpine | 6003 | 0.813 | 0.787 | 0.747 | 0.989 | 0.288 | 0.003 | 0.515 | -119.822359 | 38.596786 | 80 |
188 | California | CA | Amador | 6005 | 0.387 | 0.303 | 0.612 | 0.593 | 0.396 | 0.006 | 0.156 | -120.656960 | 38.445831 | 3427 |
189 | California | CA | Butte | 6007 | 0.607 | 0.327 | 0.776 | 0.758 | 0.057 | 0.111 | 0.235 | -121.600525 | 39.667278 | 10823 |
190 | California | CA | Calaveras | 6009 | 0.362 | 0.639 | 0.548 | 0.126 | 0.230 | 0.032 | 0.098 | -120.552913 | 38.205371 | 1898 |
# Now we need to add total number of covid19 deaths per county to the ca_df_raw
# add covid death information for each county.
df_covid_deaths_US = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv")
df_covid_deaths_CA = df_covid_deaths_US.loc[df_covid_deaths_US['Province_State'] == "California"]
df_covid_deaths_CA.head()
UID | iso2 | iso3 | code3 | FIPS | Admin2 | Province_State | Country_Region | Lat | Long_ | Combined_Key | Population | 1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | 1/28/20 | 1/29/20 | 1/30/20 | 1/31/20 | 2/1/20 | 2/2/20 | 2/3/20 | 2/4/20 | 2/5/20 | 2/6/20 | 2/7/20 | 2/8/20 | 2/9/20 | 2/10/20 | 2/11/20 | 2/12/20 | 2/13/20 | 2/14/20 | 2/15/20 | 2/16/20 | 2/17/20 | 2/18/20 | ... | 1/12/21 | 1/13/21 | 1/14/21 | 1/15/21 | 1/16/21 | 1/17/21 | 1/18/21 | 1/19/21 | 1/20/21 | 1/21/21 | 1/22/21 | 1/23/21 | 1/24/21 | 1/25/21 | 1/26/21 | 1/27/21 | 1/28/21 | 1/29/21 | 1/30/21 | 1/31/21 | 2/1/21 | 2/2/21 | 2/3/21 | 2/4/21 | 2/5/21 | 2/6/21 | 2/7/21 | 2/8/21 | 2/9/21 | 2/10/21 | 2/11/21 | 2/12/21 | 2/13/21 | 2/14/21 | 2/15/21 | 2/16/21 | 2/17/21 | 2/18/21 | 2/19/21 | 2/20/21 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
195 | 84006001 | US | USA | 840 | 6001.0 | Alameda | California | US | 37.646294 | -121.892927 | Alameda, California, US | 1671329 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 763 | 776 | 776 | 768 | 757 | 778 | 764 | 765 | 788 | 842 | 856 | 886 | 894 | 896 | 906 | 929 | 937 | 959 | 974 | 980 | 982 | 991 | 999 | 1021 | 1060 | 1037 | 1041 | 1041 | 1051 | 1063 | 1064 | 1088 | 1120 | 1120 | 1121 | 1122 | 1125 | 1126 | 1171 | 1175 |
196 | 84006003 | US | USA | 840 | 6003.0 | Alpine | California | US | 38.596786 | -119.822359 | Alpine, California, US | 1129 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
197 | 84006005 | US | USA | 840 | 6005.0 | Amador | California | US | 38.445831 | -120.656960 | Amador, California, US | 39752 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 28 | 29 | 29 | 29 | 29 | 29 | 29 | 31 | 31 | 32 | 32 | 32 | 32 | 33 | 33 | 33 | 33 | 33 | 33 | 33 | 34 | 34 | 34 | 34 | 35 | 35 | 35 | 36 | 37 | 37 | 38 | 38 | 38 | 38 | 38 | 38 | 38 | 38 | 39 | 39 |
198 | 84006007 | US | USA | 840 | 6007.0 | Butte | California | US | 39.667278 | -121.600525 | Butte, California, US | 219186 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 116 | 120 | 120 | 121 | 121 | 121 | 121 | 123 | 128 | 130 | 134 | 134 | 134 | 136 | 136 | 136 | 136 | 138 | 138 | 138 | 142 | 142 | 145 | 145 | 147 | 147 | 147 | 148 | 148 | 148 | 149 | 149 | 149 | 149 | 149 | 153 | 153 | 153 | 156 | 156 |
199 | 84006009 | US | USA | 840 | 6009.0 | Calaveras | California | US | 38.205371 | -120.552913 | Calaveras, California, US | 45905 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 22 | 22 | 22 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 25 | 25 | 25 | 25 | 25 | 25 | 25 | 25 | 25 | 25 | 25 |
5 rows × 408 columns
# we clean the data and just grab the county name in Combined_Key column
df_covid_deaths_CA['Combined_Key'] = df_covid_deaths_CA['Combined_Key'].apply(lambda x: x.split(",")[0])
df_covid_deaths_CA.head()
UID | iso2 | iso3 | code3 | FIPS | Admin2 | Province_State | Country_Region | Lat | Long_ | Combined_Key | Population | 1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | 1/28/20 | 1/29/20 | 1/30/20 | 1/31/20 | 2/1/20 | 2/2/20 | 2/3/20 | 2/4/20 | 2/5/20 | 2/6/20 | 2/7/20 | 2/8/20 | 2/9/20 | 2/10/20 | 2/11/20 | 2/12/20 | 2/13/20 | 2/14/20 | 2/15/20 | 2/16/20 | 2/17/20 | 2/18/20 | ... | 1/12/21 | 1/13/21 | 1/14/21 | 1/15/21 | 1/16/21 | 1/17/21 | 1/18/21 | 1/19/21 | 1/20/21 | 1/21/21 | 1/22/21 | 1/23/21 | 1/24/21 | 1/25/21 | 1/26/21 | 1/27/21 | 1/28/21 | 1/29/21 | 1/30/21 | 1/31/21 | 2/1/21 | 2/2/21 | 2/3/21 | 2/4/21 | 2/5/21 | 2/6/21 | 2/7/21 | 2/8/21 | 2/9/21 | 2/10/21 | 2/11/21 | 2/12/21 | 2/13/21 | 2/14/21 | 2/15/21 | 2/16/21 | 2/17/21 | 2/18/21 | 2/19/21 | 2/20/21 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
195 | 84006001 | US | USA | 840 | 6001.0 | Alameda | California | US | 37.646294 | -121.892927 | Alameda | 1671329 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 763 | 776 | 776 | 768 | 757 | 778 | 764 | 765 | 788 | 842 | 856 | 886 | 894 | 896 | 906 | 929 | 937 | 959 | 974 | 980 | 982 | 991 | 999 | 1021 | 1060 | 1037 | 1041 | 1041 | 1051 | 1063 | 1064 | 1088 | 1120 | 1120 | 1121 | 1122 | 1125 | 1126 | 1171 | 1175 |
196 | 84006003 | US | USA | 840 | 6003.0 | Alpine | California | US | 38.596786 | -119.822359 | Alpine | 1129 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
197 | 84006005 | US | USA | 840 | 6005.0 | Amador | California | US | 38.445831 | -120.656960 | Amador | 39752 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 28 | 29 | 29 | 29 | 29 | 29 | 29 | 31 | 31 | 32 | 32 | 32 | 32 | 33 | 33 | 33 | 33 | 33 | 33 | 33 | 34 | 34 | 34 | 34 | 35 | 35 | 35 | 36 | 37 | 37 | 38 | 38 | 38 | 38 | 38 | 38 | 38 | 38 | 39 | 39 |
198 | 84006007 | US | USA | 840 | 6007.0 | Butte | California | US | 39.667278 | -121.600525 | Butte | 219186 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 116 | 120 | 120 | 121 | 121 | 121 | 121 | 123 | 128 | 130 | 134 | 134 | 134 | 136 | 136 | 136 | 136 | 138 | 138 | 138 | 142 | 142 | 145 | 145 | 147 | 147 | 147 | 148 | 148 | 148 | 149 | 149 | 149 | 149 | 149 | 153 | 153 | 153 | 156 | 156 |
199 | 84006009 | US | USA | 840 | 6009.0 | Calaveras | California | US | 38.205371 | -120.552913 | Calaveras | 45905 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 22 | 22 | 22 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 25 | 25 | 25 | 25 | 25 | 25 | 25 | 25 | 25 | 25 | 25 |
5 rows × 408 columns
# extract rows 'Out of CA', 'Unassigned'
df_covid_deaths_CA.drop(df_covid_deaths_CA.loc[df_covid_deaths_CA['Combined_Key']=="Out of CA"].index, inplace=True)
df_covid_deaths_CA.drop(df_covid_deaths_CA.loc[df_covid_deaths_CA['Combined_Key']=="Unassigned"].index, inplace=True)
len(df_covid_deaths_CA)
58
# creata total_death row
df_covid_deaths_CA['total_deaths']= df_covid_deaths_CA[df_covid_deaths_CA.columns[-1]]
df_covid_deaths_CA.head()
UID | iso2 | iso3 | code3 | FIPS | Admin2 | Province_State | Country_Region | Lat | Long_ | Combined_Key | Population | 1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | 1/28/20 | 1/29/20 | 1/30/20 | 1/31/20 | 2/1/20 | 2/2/20 | 2/3/20 | 2/4/20 | 2/5/20 | 2/6/20 | 2/7/20 | 2/8/20 | 2/9/20 | 2/10/20 | 2/11/20 | 2/12/20 | 2/13/20 | 2/14/20 | 2/15/20 | 2/16/20 | 2/17/20 | 2/18/20 | ... | 1/13/21 | 1/14/21 | 1/15/21 | 1/16/21 | 1/17/21 | 1/18/21 | 1/19/21 | 1/20/21 | 1/21/21 | 1/22/21 | 1/23/21 | 1/24/21 | 1/25/21 | 1/26/21 | 1/27/21 | 1/28/21 | 1/29/21 | 1/30/21 | 1/31/21 | 2/1/21 | 2/2/21 | 2/3/21 | 2/4/21 | 2/5/21 | 2/6/21 | 2/7/21 | 2/8/21 | 2/9/21 | 2/10/21 | 2/11/21 | 2/12/21 | 2/13/21 | 2/14/21 | 2/15/21 | 2/16/21 | 2/17/21 | 2/18/21 | 2/19/21 | 2/20/21 | total_deaths | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
195 | 84006001 | US | USA | 840 | 6001.0 | Alameda | California | US | 37.646294 | -121.892927 | Alameda | 1671329 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 776 | 776 | 768 | 757 | 778 | 764 | 765 | 788 | 842 | 856 | 886 | 894 | 896 | 906 | 929 | 937 | 959 | 974 | 980 | 982 | 991 | 999 | 1021 | 1060 | 1037 | 1041 | 1041 | 1051 | 1063 | 1064 | 1088 | 1120 | 1120 | 1121 | 1122 | 1125 | 1126 | 1171 | 1175 | 1175 |
196 | 84006003 | US | USA | 840 | 6003.0 | Alpine | California | US | 38.596786 | -119.822359 | Alpine | 1129 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
197 | 84006005 | US | USA | 840 | 6005.0 | Amador | California | US | 38.445831 | -120.656960 | Amador | 39752 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 29 | 29 | 29 | 29 | 29 | 29 | 31 | 31 | 32 | 32 | 32 | 32 | 33 | 33 | 33 | 33 | 33 | 33 | 33 | 34 | 34 | 34 | 34 | 35 | 35 | 35 | 36 | 37 | 37 | 38 | 38 | 38 | 38 | 38 | 38 | 38 | 38 | 39 | 39 | 39 |
198 | 84006007 | US | USA | 840 | 6007.0 | Butte | California | US | 39.667278 | -121.600525 | Butte | 219186 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 120 | 120 | 121 | 121 | 121 | 121 | 123 | 128 | 130 | 134 | 134 | 134 | 136 | 136 | 136 | 136 | 138 | 138 | 138 | 142 | 142 | 145 | 145 | 147 | 147 | 147 | 148 | 148 | 148 | 149 | 149 | 149 | 149 | 149 | 153 | 153 | 153 | 156 | 156 | 156 |
199 | 84006009 | US | USA | 840 | 6009.0 | Calaveras | California | US | 38.205371 | -120.552913 | Calaveras | 45905 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 22 | 22 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 25 | 25 | 25 | 25 | 25 | 25 | 25 | 25 | 25 | 25 | 25 | 25 |
5 rows × 409 columns
# Check total deaths CA
df_covid_deaths_CA["total_deaths"].sum()
49120
# Now merge total deaths with df_raw
ca_df_raw["Total Covid19 Deaths"] = df_covid_deaths_CA.total_deaths.values
ca_df_raw.head()
State | State Abbreviation | County | FIPS (5-digit) | THEME 1: Socioeconomic Status | THEME 2:\nHousehold Composition & Disability | THEME 3: \nMinority Status & Language | THEME 4: \nHousing Type & Transportation | THEME 5: Epidemiological Factors | THEME 6: Healthcare System Factors | CCVI SCORE\nHigher = More Vulnerable | Longitude | Latitude | Total Covid19 Cases | Total Covid19 Deaths | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
186 | California | CA | Alameda | 6001 | 0.184 | 0.028 | 0.971 | 0.604 | 0.093 | 0.022 | 0.097 | -121.892927 | 37.646294 | 79471 | 1175 |
187 | California | CA | Alpine | 6003 | 0.813 | 0.787 | 0.747 | 0.989 | 0.288 | 0.003 | 0.515 | -119.822359 | 38.596786 | 80 | 0 |
188 | California | CA | Amador | 6005 | 0.387 | 0.303 | 0.612 | 0.593 | 0.396 | 0.006 | 0.156 | -120.656960 | 38.445831 | 3427 | 39 |
189 | California | CA | Butte | 6007 | 0.607 | 0.327 | 0.776 | 0.758 | 0.057 | 0.111 | 0.235 | -121.600525 | 39.667278 | 10823 | 156 |
190 | California | CA | Calaveras | 6009 | 0.362 | 0.639 | 0.548 | 0.126 | 0.230 | 0.032 | 0.098 | -120.552913 | 38.205371 | 1898 | 25 |
# import hospital data
hospital_raw = pd.read_csv("https://data.ca.gov/dataset/529ac907-6ba1-4cb7-9aae-8966fc96aeef/resource/42d33765-20fd-44b8-a978-b083b7542225/download/hospitals_by_county.csv")
hospital_raw
county | todays_date | hospitalized_covid_confirmed_patients | hospitalized_suspected_covid_patients | hospitalized_covid_patients | all_hospital_beds | icu_covid_confirmed_patients | icu_suspected_covid_patients | icu_available_beds | |
---|---|---|---|---|---|---|---|---|---|
0 | San Mateo | 2020-03-29 | 46.0 | 27.0 | NaN | NaN | 16.0 | 10.0 | 28.0 |
1 | Humboldt | 2020-03-29 | 1.0 | 10.0 | NaN | NaN | 0.0 | 0.0 | 18.0 |
2 | San Francisco | 2020-03-29 | 50.0 | 73.0 | NaN | NaN | 24.0 | 10.0 | 61.0 |
3 | Santa Cruz | 2020-03-29 | 0.0 | 0.0 | NaN | NaN | 0.0 | 0.0 | NaN |
4 | San Luis Obispo | 2020-03-29 | 7.0 | 12.0 | NaN | NaN | 3.0 | 4.0 | 948.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
18416 | Plumas | 2021-02-20 | 0.0 | 0.0 | 0.0 | 35.0 | 0.0 | 0.0 | 0.0 |
18417 | Trinity | 2021-02-20 | 0.0 | 0.0 | 0.0 | 25.0 | 0.0 | 0.0 | 0.0 |
18418 | Orange | 2021-02-20 | 577.0 | 32.0 | 609.0 | 5839.0 | 190.0 | 1.0 | 122.0 |
18419 | San Francisco | 2021-02-20 | 89.0 | 5.0 | 94.0 | 2112.0 | 28.0 | 0.0 | 87.0 |
18420 | San Bernardino | 2021-02-20 | 433.0 | 37.0 | 470.0 | 3510.0 | 127.0 | 2.0 | 102.0 |
18421 rows × 9 columns
# First we need to change todays_date column to a datetime column
hospital_raw['todays_date'] = pd.to_datetime(hospital_raw['todays_date'])
most_recent_date = hospital_raw["todays_date"].max()
most_recent_date.date()
# hospital_raw = hospital_raw.loc[hospital_raw['todays_date'] == most_recent_date ]
# hospital_raw
x = hospital_raw.groupby(by=["county"]).sum()
len(x.index)
lst3 = list(x.index)
new_list1 = [
list(set(lst2).difference(lst3))
]
new_list1
# No hospital data from Alpine and Sierra
[['Alpine', 'Sierra']]
# we grab the data from the most recent date
hospital_raw = hospital_raw.loc[hospital_raw['todays_date'] == most_recent_date ]
# create a columns list
columns_lst = list(hospital_raw.columns)
# create an empty list to add the cities that don't have hospital data.
empty_list = []
# loop over new_list1[0]
for i in new_list1[0]:
data = [str(i),most_recent_date,None,None,None,None,None,None,None]
empty_list.append(data)
# create a second dataframe
hospital_raw_df2 = pd.DataFrame(empty_list,columns=columns_lst)
# append the second dataframe to hospital_raw
hospital_raw = hospital_raw.append(hospital_raw_df2, ignore_index=True)
# we sort the hospital data by county name
hospital_raw = hospital_raw.sort_values("county")
# length should be 58
len(hospital_raw)
58
# check hospital data now
hospital_raw.head()
county | todays_date | hospitalized_covid_confirmed_patients | hospitalized_suspected_covid_patients | hospitalized_covid_patients | all_hospital_beds | icu_covid_confirmed_patients | icu_suspected_covid_patients | icu_available_beds | |
---|---|---|---|---|---|---|---|---|---|
38 | Alameda | 2021-02-20 | 176 | 26 | 202 | 2481 | 58 | 1 | 78 |
56 | Alpine | 2021-02-20 | None | None | None | None | None | None | None |
35 | Amador | 2021-02-20 | 6 | 2 | 8 | 52 | 1 | 1 | 0 |
46 | Butte | 2021-02-20 | 11 | 0 | 11 | 451 | 2 | 0 | 11 |
43 | Calaveras | 2021-02-20 | 2 | 0 | 2 | 33 | 1 | 0 | 7 |
# now we add the hospital data to our original ca_df_raw
# Now merge the hospital data to each county in ca_df_raw
ca_df_raw["todays_date"] = hospital_raw.todays_date.values
ca_df_raw["hospitalized_covid_confirmed_patients"] = hospital_raw.hospitalized_covid_confirmed_patients.values
ca_df_raw["hospitalized_suspected_covid_patients"] = hospital_raw.hospitalized_suspected_covid_patients.values
ca_df_raw["hospitalized_covid_patients"] = hospital_raw.hospitalized_covid_patients.values
ca_df_raw["all_hospital_beds"] = hospital_raw.all_hospital_beds.values
ca_df_raw["icu_covid_confirmed_patients"] = hospital_raw.icu_covid_confirmed_patients.values
ca_df_raw["icu_suspected_covid_patients"] = hospital_raw.icu_suspected_covid_patients.values
ca_df_raw["icu_available_beds"] = hospital_raw.icu_available_beds.values
ca_df_raw.head()
State | State Abbreviation | County | FIPS (5-digit) | THEME 1: Socioeconomic Status | THEME 2:\nHousehold Composition & Disability | THEME 3: \nMinority Status & Language | THEME 4: \nHousing Type & Transportation | THEME 5: Epidemiological Factors | THEME 6: Healthcare System Factors | CCVI SCORE\nHigher = More Vulnerable | Longitude | Latitude | Total Covid19 Cases | Total Covid19 Deaths | todays_date | hospitalized_covid_confirmed_patients | hospitalized_suspected_covid_patients | hospitalized_covid_patients | all_hospital_beds | icu_covid_confirmed_patients | icu_suspected_covid_patients | icu_available_beds | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
186 | California | CA | Alameda | 6001 | 0.184 | 0.028 | 0.971 | 0.604 | 0.093 | 0.022 | 0.097 | -121.892927 | 37.646294 | 79471 | 1175 | 2021-02-20 | 176 | 26 | 202 | 2481 | 58 | 1 | 78 |
187 | California | CA | Alpine | 6003 | 0.813 | 0.787 | 0.747 | 0.989 | 0.288 | 0.003 | 0.515 | -119.822359 | 38.596786 | 80 | 0 | 2021-02-20 | None | None | None | None | None | None | None |
188 | California | CA | Amador | 6005 | 0.387 | 0.303 | 0.612 | 0.593 | 0.396 | 0.006 | 0.156 | -120.656960 | 38.445831 | 3427 | 39 | 2021-02-20 | 6 | 2 | 8 | 52 | 1 | 1 | 0 |
189 | California | CA | Butte | 6007 | 0.607 | 0.327 | 0.776 | 0.758 | 0.057 | 0.111 | 0.235 | -121.600525 | 39.667278 | 10823 | 156 | 2021-02-20 | 11 | 0 | 11 | 451 | 2 | 0 | 11 |
190 | California | CA | Calaveras | 6009 | 0.362 | 0.639 | 0.548 | 0.126 | 0.230 | 0.032 | 0.098 | -120.552913 | 38.205371 | 1898 | 25 | 2021-02-20 | 2 | 0 | 2 | 33 | 1 | 0 | 7 |
# cleaned data as of now
ca_df_raw.head()
State | State Abbreviation | County | FIPS (5-digit) | THEME 1: Socioeconomic Status | THEME 2:\nHousehold Composition & Disability | THEME 3: \nMinority Status & Language | THEME 4: \nHousing Type & Transportation | THEME 5: Epidemiological Factors | THEME 6: Healthcare System Factors | CCVI SCORE\nHigher = More Vulnerable | Longitude | Latitude | Total Covid19 Cases | Total Covid19 Deaths | todays_date | hospitalized_covid_confirmed_patients | hospitalized_suspected_covid_patients | hospitalized_covid_patients | all_hospital_beds | icu_covid_confirmed_patients | icu_suspected_covid_patients | icu_available_beds | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
186 | California | CA | Alameda | 6001 | 0.184 | 0.028 | 0.971 | 0.604 | 0.093 | 0.022 | 0.097 | -121.892927 | 37.646294 | 79471 | 1175 | 2021-02-20 | 176 | 26 | 202 | 2481 | 58 | 1 | 78 |
187 | California | CA | Alpine | 6003 | 0.813 | 0.787 | 0.747 | 0.989 | 0.288 | 0.003 | 0.515 | -119.822359 | 38.596786 | 80 | 0 | 2021-02-20 | None | None | None | None | None | None | None |
188 | California | CA | Amador | 6005 | 0.387 | 0.303 | 0.612 | 0.593 | 0.396 | 0.006 | 0.156 | -120.656960 | 38.445831 | 3427 | 39 | 2021-02-20 | 6 | 2 | 8 | 52 | 1 | 1 | 0 |
189 | California | CA | Butte | 6007 | 0.607 | 0.327 | 0.776 | 0.758 | 0.057 | 0.111 | 0.235 | -121.600525 | 39.667278 | 10823 | 156 | 2021-02-20 | 11 | 0 | 11 | 451 | 2 | 0 | 11 |
190 | California | CA | Calaveras | 6009 | 0.362 | 0.639 | 0.548 | 0.126 | 0.230 | 0.032 | 0.098 | -120.552913 | 38.205371 | 1898 | 25 | 2021-02-20 | 2 | 0 | 2 | 33 | 1 | 0 | 7 |
# We grab the population data from the year 2020.
# cities_pop = pd.read_html('https://www.california-demographics.com/counties_by_population', header = 0)
cities_pop = pd.read_html('https://worldpopulationreview.com/us-counties/states/ca', header = 0)
df_pop = cities_pop[0]
df_pop = df_pop.rename(columns={"2021 Population": "Population"})
city_lst = []
for i in df_pop.Name.values:
spl_string = i.split()
rm = spl_string[:-1]
listToStr = ' '.join([str(elem) for elem in rm])
city_lst.append(listToStr)
df_pop['county'] = city_lst
df_pop = df_pop.drop(columns=['Name'],axis=1)
df_pop
# # We create a dictionary where keys are city and the value is the population number.
pop_dict = pd.Series(df_pop.Population.values,index=df_pop.county).to_dict()
# pop_dict.pop('United States Census Bureau. Annual Estimates of the Resident Population: April 1, 2010 to July 1, 2019. U.S. Census Bureau, Population Division. Web. May 2020. http://www.census.gov/. United States Census Bureau. B01001 SEX BY AGE, 2019 American Community Survey 5-Year Estimates. U.S. Census Bureau, American Community Survey Office. Web. 10 December 2020.')
pop_dict
{'Alameda': 1680480, 'Alpine': 1209, 'Amador': 40446, 'Butte': 196880, 'Calaveras': 46319, 'Colusa': 21805, 'Contra Costa': 1159540, 'Del Norte': 27956, 'El Dorado': 197037, 'Fresno': 1013400, 'Glenn': 29245, 'Humboldt': 134186, 'Imperial': 180599, 'Inyo': 18225, 'Kern': 913090, 'Kings': 156056, 'Lake': 64524, 'Lassen': 30483, 'Los Angeles': 9969510, 'Madera': 158217, 'Marin': 257154, 'Mariposa': 16799, 'Mendocino': 85445, 'Merced': 284738, 'Modoc': 8923, 'Mono': 14526, 'Monterey': 434283, 'Napa': 135654, 'Nevada': 100249, 'Orange': 3175130, 'Placer': 410327, 'Plumas': 18939, 'Riverside': 2520060, 'Sacramento': 1578680, 'San Benito': 65490, 'San Bernardino': 2206750, 'San Diego': 3347270, 'San Francisco': 883255, 'San Joaquin': 781462, 'San Luis Obispo': 282625, 'San Mateo': 762357, 'Santa Barbara': 447937, 'Santa Clara': 1918880, 'Santa Cruz': 271957, 'Shasta': 180822, 'Sierra': 3021, 'Siskiyou': 43517, 'Solano': 451479, 'Sonoma': 485722, 'Stanislaus': 555728, 'Sutter': 98217, 'Tehama': 67216, 'Trinity': 11721, 'Tulare': 469407, 'Tuolumne': 54660, 'Ventura': 841734, 'Yolo': 221264, 'Yuba': 80890}
# We create a list of the locations in the dataset
df_loc_lst = list(ca_df_raw.County.values)
pop_lst = []
# Iterated of the list of locations in the dataset.
for i in df_loc_lst:
try:
# Append the population to the pop_lst
pop_lst.append(pop_dict[i])
except KeyError:
# If it doesn't have the city in the data that we scraped we just add 0
pop_lst.append(0)
continue
# Create a new column
ca_df_raw["population"] = pop_lst
ca_df_raw.head()
State | State Abbreviation | County | FIPS (5-digit) | THEME 1: Socioeconomic Status | THEME 2:\nHousehold Composition & Disability | THEME 3: \nMinority Status & Language | THEME 4: \nHousing Type & Transportation | THEME 5: Epidemiological Factors | THEME 6: Healthcare System Factors | CCVI SCORE\nHigher = More Vulnerable | Longitude | Latitude | Total Covid19 Cases | Total Covid19 Deaths | todays_date | hospitalized_covid_confirmed_patients | hospitalized_suspected_covid_patients | hospitalized_covid_patients | all_hospital_beds | icu_covid_confirmed_patients | icu_suspected_covid_patients | icu_available_beds | population | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
186 | California | CA | Alameda | 6001 | 0.184 | 0.028 | 0.971 | 0.604 | 0.093 | 0.022 | 0.097 | -121.892927 | 37.646294 | 79471 | 1175 | 2021-02-20 | 176 | 26 | 202 | 2481 | 58 | 1 | 78 | 1680480 |
187 | California | CA | Alpine | 6003 | 0.813 | 0.787 | 0.747 | 0.989 | 0.288 | 0.003 | 0.515 | -119.822359 | 38.596786 | 80 | 0 | 2021-02-20 | None | None | None | None | None | None | None | 1209 |
188 | California | CA | Amador | 6005 | 0.387 | 0.303 | 0.612 | 0.593 | 0.396 | 0.006 | 0.156 | -120.656960 | 38.445831 | 3427 | 39 | 2021-02-20 | 6 | 2 | 8 | 52 | 1 | 1 | 0 | 40446 |
189 | California | CA | Butte | 6007 | 0.607 | 0.327 | 0.776 | 0.758 | 0.057 | 0.111 | 0.235 | -121.600525 | 39.667278 | 10823 | 156 | 2021-02-20 | 11 | 0 | 11 | 451 | 2 | 0 | 11 | 196880 |
190 | California | CA | Calaveras | 6009 | 0.362 | 0.639 | 0.548 | 0.126 | 0.230 | 0.032 | 0.098 | -120.552913 | 38.205371 | 1898 | 25 | 2021-02-20 | 2 | 0 | 2 | 33 | 1 | 0 | 7 | 46319 |
# check df data types
ca_df_raw.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 58 entries, 186 to 243 Data columns (total 24 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 State 58 non-null object 1 State Abbreviation 58 non-null object 2 County 58 non-null object 3 FIPS (5-digit) 58 non-null int64 4 THEME 1: Socioeconomic Status 58 non-null float64 5 THEME 2: Household Composition & Disability 58 non-null float64 6 THEME 3: Minority Status & Language 58 non-null float64 7 THEME 4: Housing Type & Transportation 58 non-null float64 8 THEME 5: Epidemiological Factors 58 non-null float64 9 THEME 6: Healthcare System Factors 58 non-null float64 10 CCVI SCORE Higher = More Vulnerable 58 non-null float64 11 Longitude 58 non-null float64 12 Latitude 58 non-null float64 13 Total Covid19 Cases 58 non-null int64 14 Total Covid19 Deaths 58 non-null int64 15 todays_date 58 non-null datetime64[ns] 16 hospitalized_covid_confirmed_patients 56 non-null object 17 hospitalized_suspected_covid_patients 56 non-null object 18 hospitalized_covid_patients 56 non-null object 19 all_hospital_beds 56 non-null object 20 icu_covid_confirmed_patients 56 non-null object 21 icu_suspected_covid_patients 56 non-null object 22 icu_available_beds 56 non-null object 23 population 58 non-null int64 dtypes: datetime64[ns](1), float64(9), int64(4), object(10) memory usage: 11.3+ KB
# run basic stats
ca_df_raw.describe()
FIPS (5-digit) | THEME 1: Socioeconomic Status | THEME 2:\nHousehold Composition & Disability | THEME 3: \nMinority Status & Language | THEME 4: \nHousing Type & Transportation | THEME 5: Epidemiological Factors | THEME 6: Healthcare System Factors | CCVI SCORE\nHigher = More Vulnerable | Longitude | Latitude | Total Covid19 Cases | Total Covid19 Deaths | population | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 58.000000 | 58.000000 | 58.000000 | 58.000000 | 58.000000 | 58.000000 | 58.000000 | 58.000000 | 58.000000 | 58.000000 | 5.800000e+01 | 58.000000 | 5.800000e+01 |
mean | 6058.000000 | 0.508414 | 0.346638 | 0.845431 | 0.608897 | 0.160224 | 0.033810 | 0.252017 | -120.728594 | 37.843962 | 6.081617e+04 | 846.896552 | 6.829913e+05 |
std | 33.773757 | 0.269966 | 0.271088 | 0.167797 | 0.232298 | 0.188195 | 0.034613 | 0.191745 | 1.922509 | 2.148394 | 1.648209e+05 | 2677.288290 | 1.462337e+06 |
min | 6001.000000 | 0.016000 | 0.007000 | 0.336000 | 0.121000 | 0.006000 | 0.000000 | 0.008000 | -123.897406 | 33.034846 | 8.000000e+01 | 0.000000 | 1.209000e+03 |
25% | 6029.500000 | 0.254000 | 0.109750 | 0.769250 | 0.463250 | 0.057000 | 0.006000 | 0.098750 | -122.031429 | 36.641646 | 3.165250e+03 | 34.500000 | 4.840425e+04 |
50% | 6058.000000 | 0.559000 | 0.312500 | 0.940500 | 0.639000 | 0.103500 | 0.021000 | 0.203000 | -120.918298 | 38.048832 | 1.280850e+04 | 172.000000 | 1.888510e+05 |
75% | 6086.500000 | 0.748500 | 0.593000 | 0.969500 | 0.778000 | 0.163000 | 0.060500 | 0.361250 | -119.843603 | 39.241624 | 4.602025e+04 | 636.500000 | 7.106998e+05 |
max | 6115.000000 | 0.974000 | 0.892000 | 0.997000 | 0.989000 | 0.843000 | 0.161000 | 0.812000 | -115.366904 | 41.742283 | 1.179149e+06 | 19801.000000 | 9.969510e+06 |
ca_df_raw.columns
Index(['State', 'State Abbreviation', 'County', 'FIPS (5-digit)', 'THEME 1: Socioeconomic Status', 'THEME 2:\nHousehold Composition & Disability', 'THEME 3: \nMinority Status & Language', 'THEME 4: \nHousing Type & Transportation', 'THEME 5: Epidemiological Factors', 'THEME 6: Healthcare System Factors', 'CCVI SCORE\nHigher = More Vulnerable', 'Longitude', 'Latitude', 'Total Covid19 Cases', 'Total Covid19 Deaths', 'todays_date', 'hospitalized_covid_confirmed_patients', 'hospitalized_suspected_covid_patients', 'hospitalized_covid_patients', 'all_hospital_beds', 'icu_covid_confirmed_patients', 'icu_suspected_covid_patients', 'icu_available_beds', 'population'], dtype='object')
# rename cols for easier referencing
ca_df_raw.rename(columns={
'State': 'state',
'State Abbreviation': 'abbreviation',
'County': 'county',
'FIPS (5-digit)': 'fips',
'THEME 1: Socioeconomic Status':'t1_socioecon',
'THEME 2:\nHousehold Composition & Disability': 't2_household',
'THEME 3: \nMinority Status & Language' : 't3_minority',
'THEME 4: \nHousing Type & Transportation' : 't4_housing',
'THEME 5: Epidemiological Factors' : 'epidim',
'THEME 6: Healthcare System Factors' : 'healthcare_sys',
'CCVI SCORE\nHigher = More Vulnerable' : 'ccvi_score',
'Longitude' : 'long',
'Latitude' : 'lat',
'Total Covid19 Cases': 'covid_total',
'Total Covid19 Deaths' : 'covid_deaths',
'hospitalized_covid_confirmed_patients' : 'covid_confirmed',
'hospitalized_suspected_covid_patients' : 'covid_suspected',
'hospitalized_covid_patients' : 'covid_total_hospitalized'},
inplace = True, errors='raise')
# add columns for per 100k
ca_df_raw['covid_cases_per_100k'] = ca_df_raw.apply(lambda x: (x.covid_total/x.population * 100000), axis = 1)
ca_df_raw['covid_deaths_per_100k'] = ca_df_raw.apply(lambda x: (x.covid_deaths/x.population * 100000), axis = 1)
# check # missing values
ca_df_raw.isnull().sum()
state 0 abbreviation 0 county 0 fips 0 t1_socioecon 0 t2_household 0 t3_minority 0 t4_housing 0 epidim 0 healthcare_sys 0 ccvi_score 0 long 0 lat 0 covid_total 0 covid_deaths 0 todays_date 0 covid_confirmed 2 covid_suspected 2 covid_total_hospitalized 2 all_hospital_beds 2 icu_covid_confirmed_patients 2 icu_suspected_covid_patients 2 icu_available_beds 2 population 0 covid_cases_per_100k 0 covid_deaths_per_100k 0 dtype: int64
ca_df_raw.loc[(ca_df_raw.covid_total < 100)]
state | abbreviation | county | fips | t1_socioecon | t2_household | t3_minority | t4_housing | epidim | healthcare_sys | ccvi_score | long | lat | covid_total | covid_deaths | todays_date | covid_confirmed | covid_suspected | covid_total_hospitalized | all_hospital_beds | icu_covid_confirmed_patients | icu_suspected_covid_patients | icu_available_beds | population | covid_cases_per_100k | covid_deaths_per_100k | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
187 | California | CA | Alpine | 6003 | 0.813 | 0.787 | 0.747 | 0.989 | 0.288 | 0.003 | 0.515 | -119.822359 | 38.596786 | 80 | 0 | 2021-02-20 | None | None | None | None | None | None | None | 1209 | 6617.038875 | 0.0 |
231 | California | CA | Sierra | 6091 | 0.149 | 0.101 | 0.336 | 0.587 | 0.244 | 0.005 | 0.017 | -120.520070 | 39.577135 | 99 | 0 | 2021-02-20 | None | None | None | None | None | None | None | 3021 | 3277.060576 | 0.0 |
# convert "None" values to numeric
ca_df_raw.fillna(0)
state | abbreviation | county | fips | t1_socioecon | t2_household | t3_minority | t4_housing | epidim | healthcare_sys | ccvi_score | long | lat | covid_total | covid_deaths | todays_date | covid_confirmed | covid_suspected | covid_total_hospitalized | all_hospital_beds | icu_covid_confirmed_patients | icu_suspected_covid_patients | icu_available_beds | population | covid_cases_per_100k | covid_deaths_per_100k | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
186 | California | CA | Alameda | 6001 | 0.184 | 0.028 | 0.971 | 0.604 | 0.093 | 0.022 | 0.097 | -121.892927 | 37.646294 | 79471 | 1175 | 2021-02-20 | 176.0 | 26.0 | 202.0 | 2481.0 | 58.0 | 1.0 | 78.0 | 1680480 | 4729.065505 | 69.920499 |
187 | California | CA | Alpine | 6003 | 0.813 | 0.787 | 0.747 | 0.989 | 0.288 | 0.003 | 0.515 | -119.822359 | 38.596786 | 80 | 0 | 2021-02-20 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1209 | 6617.038875 | 0.000000 |
188 | California | CA | Amador | 6005 | 0.387 | 0.303 | 0.612 | 0.593 | 0.396 | 0.006 | 0.156 | -120.656960 | 38.445831 | 3427 | 39 | 2021-02-20 | 6.0 | 2.0 | 8.0 | 52.0 | 1.0 | 1.0 | 0.0 | 40446 | 8473.025763 | 96.424863 |
189 | California | CA | Butte | 6007 | 0.607 | 0.327 | 0.776 | 0.758 | 0.057 | 0.111 | 0.235 | -121.600525 | 39.667278 | 10823 | 156 | 2021-02-20 | 11.0 | 0.0 | 11.0 | 451.0 | 2.0 | 0.0 | 11.0 | 196880 | 5497.257213 | 79.236083 |
190 | California | CA | Calaveras | 6009 | 0.362 | 0.639 | 0.548 | 0.126 | 0.230 | 0.032 | 0.098 | -120.552913 | 38.205371 | 1898 | 25 | 2021-02-20 | 2.0 | 0.0 | 2.0 | 33.0 | 1.0 | 0.0 | 7.0 | 46319 | 4097.670502 | 53.973531 |
191 | California | CA | Colusa | 6011 | 0.690 | 0.577 | 0.989 | 0.888 | 0.067 | 0.007 | 0.420 | -122.233173 | 39.178820 | 2117 | 13 | 2021-02-20 | 7.0 | 0.0 | 7.0 | 48.0 | 0.0 | 0.0 | 5.0 | 21805 | 9708.782389 | 59.619353 |
192 | California | CA | Contra Costa | 6013 | 0.197 | 0.112 | 0.941 | 0.259 | 0.114 | 0.014 | 0.095 | -121.928953 | 37.919235 | 61469 | 654 | 2021-02-20 | 78.0 | 10.0 | 88.0 | 1303.0 | 30.0 | 0.0 | 36.0 | 1159540 | 5301.153906 | 56.401677 |
193 | California | CA | Del Norte | 6015 | 0.791 | 0.598 | 0.834 | 0.921 | 0.070 | 0.046 | 0.359 | -123.897406 | 41.742283 | 959 | 5 | 2021-02-20 | 2.0 | 4.0 | 6.0 | 52.0 | 0.0 | 0.0 | 2.0 | 27956 | 3430.390614 | 17.885248 |
194 | California | CA | El Dorado | 6017 | 0.145 | 0.142 | 0.663 | 0.149 | 0.103 | 0.005 | 0.021 | -120.523317 | 38.779660 | 9017 | 93 | 2021-02-20 | 6.0 | 0.0 | 6.0 | 190.0 | 2.0 | 0.0 | 8.0 | 197037 | 4576.297853 | 47.199257 |
195 | California | CA | Fresno | 6019 | 0.810 | 0.556 | 0.981 | 0.668 | 0.055 | 0.085 | 0.457 | -119.646695 | 36.757339 | 93973 | 1367 | 2021-02-20 | 244.0 | 32.0 | 276.0 | 1512.0 | 63.0 | 4.0 | 19.0 | 1013400 | 9273.041247 | 134.892441 |
196 | California | CA | Glenn | 6021 | 0.875 | 0.753 | 0.968 | 0.676 | 0.136 | 0.161 | 0.644 | -122.393845 | 39.598759 | 2199 | 23 | 2021-02-20 | 0.0 | 0.0 | 0.0 | 47.0 | 0.0 | 0.0 | 0.0 | 29245 | 7519.234057 | 78.645922 |
197 | California | CA | Humboldt | 6023 | 0.641 | 0.376 | 0.713 | 0.718 | 0.654 | 0.056 | 0.457 | -123.876044 | 40.699230 | 3067 | 32 | 2021-02-20 | 10.0 | 4.0 | 14.0 | 273.0 | 0.0 | 1.0 | 7.0 | 134186 | 2285.633375 | 23.847495 |
198 | California | CA | Imperial | 6025 | 0.974 | 0.675 | 0.997 | 0.880 | 0.106 | 0.001 | 0.615 | -115.366904 | 33.039308 | 26840 | 584 | 2021-02-20 | 28.0 | 1.0 | 29.0 | 234.0 | 11.0 | 0.0 | 3.0 | 180599 | 14861.654826 | 323.368346 |
199 | California | CA | Inyo | 6027 | 0.332 | 0.322 | 0.864 | 0.772 | 0.160 | 0.005 | 0.214 | -117.411198 | 36.511121 | 1243 | 34 | 2021-02-20 | 0.0 | 0.0 | 0.0 | 29.0 | 0.0 | 0.0 | 1.0 | 18225 | 6820.301783 | 186.556927 |
200 | California | CA | Kern | 6029 | 0.827 | 0.334 | 0.965 | 0.619 | 0.044 | 0.001 | 0.283 | -118.727780 | 35.343294 | 101613 | 761 | 2021-02-20 | 167.0 | 7.0 | 174.0 | 1165.0 | 51.0 | 0.0 | 31.0 | 913090 | 11128.475835 | 83.343373 |
201 | California | CA | Kings | 6031 | 0.786 | 0.578 | 0.974 | 0.579 | 0.317 | 0.113 | 0.593 | -119.815997 | 36.074091 | 21853 | 214 | 2021-02-20 | 45.0 | 0.0 | 45.0 | 135.0 | 9.0 | 0.0 | 9.0 | 156056 | 14003.306505 | 137.130261 |
202 | California | CA | Lake | 6033 | 0.749 | 0.852 | 0.863 | 0.915 | 0.843 | 0.070 | 0.812 | -122.753624 | 39.101243 | 3078 | 40 | 2021-02-20 | 1.0 | 0.0 | 1.0 | 63.0 | 0.0 | 0.0 | 4.0 | 64524 | 4770.318021 | 61.992437 |
203 | California | CA | Lassen | 6035 | 0.599 | 0.166 | 0.767 | 0.576 | 0.006 | 0.077 | 0.106 | -120.593510 | 40.673113 | 5558 | 19 | 2021-02-20 | 1.0 | 0.0 | 1.0 | 25.0 | 0.0 | 0.0 | 0.0 | 30483 | 18233.113539 | 62.329823 |
204 | California | CA | Los Angeles | 6037 | 0.589 | 0.080 | 0.983 | 0.647 | 0.112 | 0.004 | 0.214 | -118.228241 | 34.308284 | 1179149 | 19801 | 2021-02-20 | 2213.0 | 212.0 | 2425.0 | 19279.0 | 679.0 | 34.0 | 438.0 | 9969510 | 11827.552207 | 198.615579 |
205 | California | CA | Madera | 6039 | 0.846 | 0.469 | 0.966 | 0.570 | 0.079 | 0.022 | 0.362 | -119.766559 | 37.215140 | 15222 | 201 | 2021-02-20 | 35.0 | 8.0 | 43.0 | 278.0 | 10.0 | 0.0 | 5.0 | 158217 | 9620.963613 | 127.040710 |
206 | California | CA | Marin | 6041 | 0.016 | 0.069 | 0.785 | 0.237 | 0.136 | 0.018 | 0.021 | -122.721063 | 38.071225 | 13011 | 194 | 2021-02-20 | 14.0 | 4.0 | 18.0 | 327.0 | 6.0 | 0.0 | 14.0 | 257154 | 5059.614083 | 75.441175 |
207 | California | CA | Mariposa | 6043 | 0.592 | 0.335 | 0.643 | 0.394 | 0.189 | 0.021 | 0.134 | -119.907333 | 37.579786 | 389 | 7 | 2021-02-20 | 0.0 | 0.0 | 0.0 | 14.0 | 0.0 | 0.0 | 0.0 | 16799 | 2315.614025 | 41.669147 |
208 | California | CA | Mendocino | 6045 | 0.697 | 0.706 | 0.895 | 0.897 | 0.094 | 0.037 | 0.396 | -123.391131 | 39.438119 | 3781 | 43 | 2021-02-20 | 6.0 | 1.0 | 7.0 | 84.0 | 2.0 | 0.0 | 3.0 | 85445 | 4425.068758 | 50.324770 |
209 | California | CA | Merced | 6047 | 0.942 | 0.628 | 0.989 | 0.638 | 0.024 | 0.062 | 0.512 | -120.720903 | 37.189224 | 28489 | 388 | 2021-02-20 | 32.0 | 1.0 | 33.0 | 272.0 | 12.0 | 0.0 | 4.0 | 284738 | 10005.338241 | 136.265620 |
210 | California | CA | Modoc | 6049 | 0.751 | 0.512 | 0.801 | 0.826 | 0.218 | 0.015 | 0.347 | -120.724482 | 41.589656 | 448 | 4 | 2021-02-20 | 0.0 | 0.0 | 0.0 | 12.0 | 0.0 | 0.0 | 0.0 | 8923 | 5020.732937 | 44.827973 |
211 | California | CA | Mono | 6051 | 0.203 | 0.007 | 0.861 | 0.754 | 0.017 | 0.033 | 0.018 | -118.887241 | 37.938993 | 1206 | 4 | 2021-02-20 | 0.0 | 0.0 | 0.0 | 17.0 | 0.0 | 0.0 | 1.0 | 14526 | 8302.354399 | 27.536831 |
212 | California | CA | Monterey | 6053 | 0.579 | 0.116 | 0.982 | 0.659 | 0.063 | 0.069 | 0.267 | -121.241340 | 36.218624 | 41884 | 315 | 2021-02-20 | 59.0 | 0.0 | 59.0 | 720.0 | 8.0 | 0.0 | 18.0 | 434283 | 9644.402383 | 72.533348 |
213 | California | CA | Napa | 6055 | 0.236 | 0.143 | 0.947 | 0.656 | 0.118 | 0.018 | 0.150 | -122.332839 | 38.507358 | 8899 | 67 | 2021-02-20 | 7.0 | 0.0 | 7.0 | 207.0 | 2.0 | 0.0 | 5.0 | 135654 | 6560.071948 | 49.390361 |
214 | California | CA | Nevada | 6057 | 0.200 | 0.182 | 0.432 | 0.141 | 0.715 | 0.002 | 0.096 | -120.762728 | 39.303948 | 3863 | 74 | 2021-02-20 | 2.0 | 0.0 | 2.0 | 115.0 | 1.0 | 0.0 | 2.0 | 100249 | 3853.405021 | 73.816198 |
215 | California | CA | Orange | 6059 | 0.284 | 0.032 | 0.961 | 0.467 | 0.582 | 0.000 | 0.213 | -117.764600 | 33.701475 | 258991 | 3770 | 2021-02-20 | 577.0 | 32.0 | 609.0 | 5839.0 | 190.0 | 1.0 | 122.0 | 3175130 | 8156.862869 | 118.735296 |
216 | California | CA | Placer | 6061 | 0.073 | 0.085 | 0.741 | 0.121 | 0.054 | 0.002 | 0.008 | -120.724057 | 39.061672 | 19520 | 229 | 2021-02-20 | 64.0 | 17.0 | 81.0 | 772.0 | 21.0 | 0.0 | 13.0 | 410327 | 4757.181467 | 55.809147 |
217 | California | CA | Plumas | 6063 | 0.242 | 0.455 | 0.551 | 0.610 | 0.196 | 0.075 | 0.140 | -120.839524 | 40.003560 | 649 | 6 | 2021-02-20 | 0.0 | 0.0 | 0.0 | 35.0 | 0.0 | 0.0 | 0.0 | 18939 | 3426.791277 | 31.680659 |
218 | California | CA | Riverside | 6065 | 0.661 | 0.181 | 0.968 | 0.330 | 0.111 | 0.001 | 0.188 | -115.993358 | 33.743150 | 287534 | 3633 | 2021-02-20 | 455.0 | 56.0 | 511.0 | 3196.0 | 138.0 | 5.0 | 36.0 | 2520060 | 11409.807703 | 144.163234 |
219 | California | CA | Sacramento | 6067 | 0.492 | 0.215 | 0.950 | 0.436 | 0.118 | 0.008 | 0.190 | -121.342537 | 38.451068 | 91815 | 1431 | 2021-02-20 | 177.0 | 13.0 | 190.0 | 2508.0 | 50.0 | 2.0 | 80.0 | 1578680 | 5815.934832 | 90.645349 |
220 | California | CA | San Benito | 6069 | 0.539 | 0.164 | 0.981 | 0.175 | 0.078 | 0.069 | 0.193 | -121.069975 | 36.603082 | 5699 | 57 | 2021-02-20 | 9.0 | 0.0 | 9.0 | 25.0 | 3.0 | 0.0 | 0.0 | 65490 | 8702.091922 | 87.036189 |
221 | California | CA | San Bernardino | 6071 | 0.747 | 0.215 | 0.973 | 0.440 | 0.066 | 0.003 | 0.222 | -116.177469 | 34.840603 | 284280 | 2557 | 2021-02-20 | 433.0 | 37.0 | 470.0 | 3510.0 | 127.0 | 2.0 | 102.0 | 2206750 | 12882.292965 | 115.871757 |
222 | California | CA | San Diego | 6073 | 0.353 | 0.069 | 0.940 | 0.477 | 0.077 | 0.000 | 0.084 | -116.736533 | 33.034846 | 256513 | 3188 | 2021-02-20 | 636.0 | 36.0 | 672.0 | 6462.0 | 205.0 | 9.0 | 219.0 | 3347270 | 7663.349536 | 95.241794 |
223 | California | CA | San Francisco | 6075 | 0.130 | 0.007 | 0.964 | 0.814 | 0.138 | 0.021 | 0.083 | -122.438567 | 37.752151 | 33746 | 394 | 2021-02-20 | 89.0 | 5.0 | 94.0 | 2112.0 | 28.0 | 0.0 | 87.0 | 883255 | 3820.640698 | 44.607729 |
224 | California | CA | San Joaquin | 6077 | 0.711 | 0.374 | 0.980 | 0.538 | 0.089 | 0.007 | 0.304 | -121.273006 | 37.934337 | 65891 | 1026 | 2021-02-20 | 126.0 | 25.0 | 151.0 | 906.0 | 45.0 | 5.0 | 14.0 | 781462 | 8431.759958 | 131.292373 |
225 | California | CA | San Luis Obispo | 6079 | 0.190 | 0.112 | 0.824 | 0.590 | 0.032 | 0.049 | 0.055 | -120.403903 | 35.388220 | 19340 | 217 | 2021-02-20 | 22.0 | 3.0 | 25.0 | 460.0 | 4.0 | 0.0 | 22.0 | 282625 | 6842.989828 | 76.780186 |
226 | California | CA | San Mateo | 6081 | 0.071 | 0.017 | 0.958 | 0.458 | 0.080 | 0.061 | 0.053 | -122.327555 | 37.422881 | 38256 | 497 | 2021-02-20 | 64.0 | 6.0 | 70.0 | 720.0 | 19.0 | 2.0 | 24.0 | 762357 | 5018.121431 | 65.192554 |
227 | California | CA | Santa Barbara | 6083 | 0.439 | 0.104 | 0.951 | 0.752 | 0.046 | 0.027 | 0.163 | -120.018849 | 34.653295 | 31432 | 394 | 2021-02-20 | 83.0 | 2.0 | 85.0 | 603.0 | 19.0 | 0.0 | 32.0 | 447937 | 7017.058202 | 87.958798 |
228 | California | CA | Santa Clara | 6085 | 0.123 | 0.018 | 0.974 | 0.462 | 0.030 | 0.064 | 0.065 | -121.697046 | 37.231049 | 108800 | 1728 | 2021-02-20 | 228.0 | 15.0 | 243.0 | 3053.0 | 71.0 | 1.0 | 59.0 | 1918880 | 5669.974152 | 90.052531 |
229 | California | CA | Santa Cruz | 6087 | 0.323 | 0.067 | 0.882 | 0.752 | 0.034 | 0.063 | 0.114 | -122.006652 | 37.055803 | 14438 | 172 | 2021-02-20 | 25.0 | 2.0 | 27.0 | 372.0 | 5.0 | 0.0 | 6.0 | 271957 | 5308.927514 | 63.245292 |
230 | California | CA | Shasta | 6089 | 0.507 | 0.641 | 0.604 | 0.701 | 0.164 | 0.073 | 0.251 | -122.039688 | 40.763914 | 10868 | 172 | 2021-02-20 | 18.0 | 0.0 | 18.0 | 567.0 | 1.0 | 0.0 | 21.0 | 180822 | 6010.330601 | 95.121169 |
231 | California | CA | Sierra | 6091 | 0.149 | 0.101 | 0.336 | 0.587 | 0.244 | 0.005 | 0.017 | -120.520070 | 39.577135 | 99 | 0 | 2021-02-20 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3021 | 3277.060576 | 0.000000 |
232 | California | CA | Siskiyou | 6093 | 0.613 | 0.760 | 0.592 | 0.903 | 0.194 | 0.050 | 0.345 | -122.540743 | 41.591989 | 1701 | 13 | 2021-02-20 | 1.0 | 0.0 | 1.0 | 56.0 | 1.0 | 0.0 | 5.0 | 43517 | 3908.817244 | 29.873383 |
233 | California | CA | Solano | 6095 | 0.343 | 0.200 | 0.952 | 0.268 | 0.135 | 0.006 | 0.144 | -121.935711 | 38.268274 | 29766 | 161 | 2021-02-20 | 70.0 | 12.0 | 82.0 | 582.0 | 17.0 | 1.0 | 11.0 | 451479 | 6592.997681 | 35.660573 |
234 | California | CA | Sonoma | 6097 | 0.244 | 0.119 | 0.901 | 0.640 | 0.104 | 0.010 | 0.101 | -122.886251 | 38.527464 | 27606 | 291 | 2021-02-20 | 38.0 | 1.0 | 39.0 | 656.0 | 13.0 | 0.0 | 22.0 | 485722 | 5683.497968 | 59.910813 |
235 | California | CA | Stanislaus | 6099 | 0.797 | 0.487 | 0.970 | 0.570 | 0.060 | 0.020 | 0.346 | -120.997072 | 37.558625 | 55081 | 926 | 2021-02-20 | 155.0 | 15.0 | 170.0 | 1305.0 | 34.0 | 1.0 | 23.0 | 555728 | 9911.503469 | 166.628279 |
236 | California | CA | Sutter | 6101 | 0.788 | 0.689 | 0.972 | 0.806 | 0.037 | 0.009 | 0.399 | -121.694590 | 39.034175 | 8766 | 94 | 2021-02-20 | 0.0 | 0.0 | 0.0 | 14.0 | 0.0 | 0.0 | 0.0 | 98217 | 8925.135160 | 95.706446 |
237 | California | CA | Tehama | 6103 | 0.848 | 0.877 | 0.846 | 0.897 | 0.115 | 0.074 | 0.514 | -122.237017 | 40.125709 | 5043 | 52 | 2021-02-20 | 3.0 | 0.0 | 3.0 | 59.0 | 0.0 | 0.0 | 1.0 | 67216 | 7502.677934 | 77.362533 |
238 | California | CA | Trinity | 6105 | 0.675 | 0.892 | 0.526 | 0.927 | 0.749 | 0.059 | 0.659 | -123.114713 | 40.649177 | 340 | 5 | 2021-02-20 | 0.0 | 0.0 | 0.0 | 25.0 | 0.0 | 0.0 | 0.0 | 11721 | 2900.776384 | 42.658476 |
239 | California | CA | Tulare | 6107 | 0.901 | 0.634 | 0.985 | 0.794 | 0.032 | 0.010 | 0.446 | -118.802020 | 36.220266 | 47399 | 720 | 2021-02-20 | 75.0 | 8.0 | 83.0 | 647.0 | 11.0 | 0.0 | 26.0 | 469407 | 10097.633823 | 153.385016 |
240 | California | CA | Tuolumne | 6109 | 0.312 | 0.405 | 0.492 | 0.780 | 0.110 | 0.028 | 0.089 | -119.952509 | 38.026440 | 3870 | 56 | 2021-02-20 | 2.0 | 0.0 | 2.0 | 84.0 | 0.0 | 0.0 | 2.0 | 54660 | 7080.131723 | 102.451518 |
241 | California | CA | Ventura | 6111 | 0.312 | 0.109 | 0.943 | 0.327 | 0.057 | 0.034 | 0.110 | -119.091061 | 34.444657 | 76582 | 782 | 2021-02-20 | 171.0 | 11.0 | 182.0 | 1150.0 | 43.0 | 0.0 | 38.0 | 841734 | 9098.123635 | 92.903459 |
242 | California | CA | Yolo | 6113 | 0.456 | 0.060 | 0.945 | 0.733 | 0.020 | 0.003 | 0.103 | -121.901829 | 38.682789 | 12606 | 181 | 2021-02-20 | 10.0 | 0.0 | 10.0 | 125.0 | 6.0 | 0.0 | 2.0 | 221264 | 5697.266614 | 81.802733 |
243 | California | CA | Yuba | 6115 | 0.745 | 0.644 | 0.916 | 0.852 | 0.037 | 0.014 | 0.324 | -121.353564 | 39.262559 | 5681 | 36 | 2021-02-20 | 21.0 | 1.0 | 22.0 | 261.0 | 4.0 | 0.0 | 4.0 | 80890 | 7023.117814 | 44.504883 |
ca_df_raw.icu_available_beds.describe()
count 56.0 unique 32.0 top 0.0 freq 9.0 Name: icu_available_beds, dtype: float64
covid_total_series = pd.Series(ca_df_raw.covid_total.values,ca_df_raw.county.values)
covid_deaths_series = pd.Series(ca_df_raw.covid_deaths.values,ca_df_raw.county.values)
# import HPI index data
# you can download it from here: https://healthyplacesindex.org/data-reports
df_hpi_raw = pd.read_csv('https://storage.googleapis.com/hpi_index/HPI2_MasterFile_2019-04-24.csv', encoding= 'unicode_escape')
df_hpi_raw
CensusTract | pop2010 | pct2010gq | City | ZIP | County_FIPS | County_Name | UrbanType | hpi2score | hpi2_pctile_pos | hpi2_pctile_neg | hpi_top25pct | quintiles | quartiles | economic | economic_pctile | education | education_pctile | housing | houseing_pctile | healthcareaccess | healthcareaccess_pctile | neighborhood | neighborhood_pctile | pollution | pollution_pctile | transportation | transportation_pctile | social | social_pctile | insured | insured_pctile | uncrowded | uncrowded_pctile | homeownership | homeownership_pctile | automobile | automobile_pctile | commute | commute_pctile | ... | income_pctile | retail | retail_pctile | parkaccess | parkaccess_pctile | treecanopy | treecanopy_pctile | alcoffsale | alcoffsale_pctile | voting | voting_pctile | ownsevere | ownsevere_pctile | rentsevere | rentsevere_pctile | houserepair | houserepair_pctile | twoparents | twoparents_pctile | supermkts | supermkts_pctile | ozone | ozone_pctile | pm25 | pm25_pctile | dieselpm | dieselpm_pctile | h20contam | h20contam_pctile | LEB | LEB_pctile | white_pct | black_pct | asian_pct | latino_pct | multiple_pct | NativeAm_pct | PacificIsl_pct | other_pct | version | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 6001400100 | 2937 | 0.1 | Oakland | 94704.0 | 6001 | Alameda | urban_area | 1.182028 | 99.114590 | 0.885410 | Yes | 1.0 | 1.0 | 1.735963 | 98.691133 | 0.948968 | 90.093674 | 0.730552 | 90.003850 | 1.285242 | 93.814962 | 0.832170 | 98.395996 | 0.878125 | 92.377775 | 0.616855 | 95.226485 | 1.336602 | 96.881817 | 95.0 | 93.814962 | 99.3 | 89.464904 | 85.5 | 92.005646 | 98.6 | 87.475940 | 15.284553 | 82.882074 | ... | 99.101758 | 9.587563 | 97.600411 | 100.00000 | 81.355062 | 36.433798 | 97.420762 | 73.169901 | 64.121648 | 82.781682 | 93.224689 | 5.092593 | 90.068010 | 27.272727 | 45.784679 | 100.000000 | 80.816117 | 93.074792 | 92.480431 | 1.319206 | 13.396638 | 0.029592 | 96.086231 | 8.697944 | 74.759399 | 27.436933 | 18.927242 | 70.599583 | 97.202618 | 86.985506 | 96.663673 | 70.752469 | 4.766769 | 15.526047 | 3.983657 | 3.779367 | 0.068097 | 0.272387 | 0.851209 | Wed13Dec17 |
1 | 6001400200 | 1974 | 3.5 | Oakland | 94618.0 | 6001 | Alameda | urban_area | 1.325874 | 99.704863 | 0.295137 | Yes | 1.0 | 1.0 | 1.808761 | 99.037598 | 1.137725 | 94.649044 | 0.959728 | 96.830489 | 1.004971 | 83.985628 | 0.503232 | 94.379571 | 0.657261 | 85.628128 | 1.494366 | 99.653535 | 1.227116 | 94.687540 | 91.6 | 83.985628 | 100.0 | 96.933145 | 63.6 | 59.502117 | 94.1 | 46.028487 | 41.950758 | 96.791993 | ... | 96.650841 | 2.448043 | 82.651097 | 91.79331 | 50.263057 | 19.708371 | 92.467599 | 28.419453 | 26.831772 | 85.492572 | 97.805723 | 3.883495 | 94.187091 | 2.666667 | 97.895547 | 99.386503 | 51.148467 | 87.012987 | 77.993071 | 100.000000 | 94.251251 | 0.029592 | 96.086231 | 8.697944 | 74.759399 | 42.190000 | 5.569100 | 70.599583 | 97.202618 | 86.957616 | 96.612344 | 78.318136 | 1.570415 | 7.396150 | 7.649443 | 4.812563 | 0.101317 | 0.000000 | 0.151976 | Wed13Dec17 |
2 | 6001400300 | 4865 | 0.8 | Oakland | 94618.0 | 6001 | Alameda | urban_area | 0.925666 | 95.932247 | 4.067753 | Yes | 1.0 | 1.0 | 1.014492 | 87.244963 | 1.348314 | 98.165020 | 0.603628 | 84.088284 | 1.268756 | 93.186193 | -0.291977 | 19.940973 | 0.657261 | 85.628128 | 0.822268 | 97.202618 | 1.078500 | 90.966252 | 94.8 | 93.186193 | 100.0 | 96.933145 | 44.3 | 33.850892 | 85.5 | 14.115232 | 37.873988 | 96.047735 | ... | 74.990376 | 1.890912 | 76.478891 | 33.25797 | 15.834723 | 5.838530 | 57.320672 | 16.690647 | 19.312203 | 82.086597 | 91.813166 | 7.359307 | 79.070961 | 12.177122 | 88.771975 | 100.000000 | 80.816117 | 87.221571 | 78.429360 | 92.745177 | 80.354164 | 0.029592 | 96.086231 | 8.697944 | 74.759399 | 42.190000 | 5.569100 | 70.599583 | 97.202618 | 86.462942 | 95.136661 | 66.927030 | 10.524152 | 8.612539 | 8.201439 | 5.056526 | 0.164440 | 0.102775 | 0.411100 | Wed13Dec17 |
3 | 6001400400 | 3703 | 0.9 | Oakland | 94609.0 | 6001 | Alameda | urban_area | 1.117603 | 98.537149 | 1.462851 | Yes | 1.0 | 1.0 | 1.277657 | 93.622482 | 0.939501 | 89.914025 | 0.356477 | 68.381881 | 1.301729 | 94.430900 | 0.070027 | 57.936610 | 0.657261 | 85.628128 | 1.756657 | 99.910176 | 1.295417 | 96.253048 | 95.2 | 94.430900 | 99.2 | 88.258694 | 37.9 | 26.305659 | 92.9 | 38.611574 | 49.724518 | 98.011036 | ... | 86.500706 | 1.828020 | 75.619145 | 84.22900 | 42.730656 | 11.378516 | 82.997562 | 0.000000 | 4.516874 | 83.732539 | 95.175157 | 13.103448 | 42.640832 | 11.926605 | 89.246760 | 99.449036 | 53.432568 | 90.876565 | 88.374182 | 97.844466 | 84.588733 | 0.029592 | 96.086231 | 8.697944 | 74.759399 | 42.190000 | 5.569100 | 70.599583 | 97.202618 | 82.710897 | 68.959322 | 65.460437 | 12.098299 | 7.291385 | 8.965703 | 5.320011 | 0.135026 | 0.162031 | 0.567108 | Wed13Dec17 |
4 | 6001400500 | 3517 | 3.6 | Oakland | 94609.0 | 6001 | Alameda | urban_area | 0.562799 | 83.318363 | 16.681637 | Yes | 1.0 | 1.0 | 0.577803 | 71.859361 | 0.486145 | 75.375337 | 0.029926 | 45.951495 | 0.485643 | 62.953933 | -0.106063 | 34.980110 | 0.657261 | 85.628128 | 0.873072 | 97.536250 | 0.949500 | 86.821506 | 85.3 | 62.953933 | 97.5 | 71.885025 | 36.3 | 24.611831 | 85.4 | 13.935583 | 39.200000 | 96.265880 | ... | 67.676120 | 1.252351 | 64.570769 | 100.00000 | 81.355062 | 5.625453 | 55.357372 | 0.000000 | 4.516874 | 78.010675 | 81.483383 | 19.658120 | 15.526755 | 20.398010 | 69.318619 | 99.685535 | 59.964070 | 88.735632 | 82.676761 | 74.722664 | 68.099577 | 0.029592 | 96.086231 | 8.697944 | 74.759399 | 42.190000 | 5.569100 | 70.599583 | 97.202618 | 80.123238 | 38.855383 | 50.554450 | 26.528291 | 5.914131 | 9.667330 | 6.113165 | 0.454933 | 0.113733 | 0.653966 | Wed13Dec17 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
8052 | 6115040800 | 4233 | 0.1 | NaN | 95692.0 | 6115 | Yuba | urban_cluster | -0.037308 | 47.427178 | 52.572822 | No | 3.0 | 3.0 | -0.124472 | 44.334659 | -0.308684 | 33.902220 | 0.526783 | 79.481586 | 0.543346 | 65.302194 | -0.016575 | 45.528038 | 0.376294 | 69.754908 | -0.149405 | 27.255229 | 0.086923 | 51.520595 | 86.0 | 65.302194 | 95.2 | 55.190556 | 57.7 | 51.148467 | 94.0 | 45.258565 | 3.573592 | 32.221224 | ... | 54.061337 | 0.014779 | 6.197870 | 83.88850 | 42.474015 | 7.773334 | 70.755806 | 63.146704 | 54.587450 | 70.834272 | 56.666239 | 3.067485 | 95.919415 | 17.857143 | 76.697036 | 98.909091 | 38.098293 | 74.953096 | 40.985500 | 36.742738 | 41.267804 | 0.051243 | 32.837162 | 7.859585 | 85.320159 | 3.230171 | 91.415373 | 488.288671 | 48.107276 | 79.340846 | 30.694213 | 71.178833 | 0.803213 | 4.890149 | 16.843846 | 4.535790 | 1.464682 | 0.094496 | 0.188991 | Wed13Dec17 |
8053 | 6115040901 | 2783 | 0.4 | NaN | 95901.0 | 6115 | Yuba | rural | -0.425301 | 24.188374 | 75.811626 | No | 4.0 | 4.0 | -0.651630 | 24.894136 | -0.833172 | 8.327987 | 0.213441 | 58.565379 | -0.553011 | 26.998588 | -0.326614 | 17.759528 | 0.164080 | 55.447196 | -0.052236 | 39.984602 | -0.180950 | 40.164250 | 72.7 | 26.998588 | 95.2 | 55.190556 | 66.0 | 62.979597 | 96.2 | 62.479148 | 2.864816 | 25.381753 | ... | 42.512511 | 0.001401 | 1.270371 | 34.42328 | 16.335173 | 7.625115 | 69.883229 | 96.155228 | 88.771975 | 68.374294 | 48.620557 | 7.153285 | 80.238676 | 40.000000 | 11.651482 | 100.000000 | 80.816117 | 70.948379 | 30.437572 | 0.097277 | 10.535096 | 0.047908 | 42.974464 | 7.440405 | 88.925959 | 2.160608 | 94.084435 | 837.561343 | 7.891698 | 75.506062 | 6.685487 | 65.145526 | 0.718649 | 10.636004 | 17.103845 | 4.275961 | 2.120014 | 0.000000 | 0.000000 | Wed13Dec17 |
8054 | 6115040902 | 1737 | 23.8 | NaN | 95901.0 | 6115 | Yuba | rural | -0.791742 | 7.840370 | 92.159630 | No | 5.0 | 4.0 | -2.426732 | 0.141152 | 0.393432 | 71.679713 | -2.993282 | 0.192480 | 1.392405 | 96.638008 | -0.500766 | 8.571795 | 0.705188 | 87.488772 | 0.446378 | 92.198127 | -0.723688 | 19.556012 | 96.3 | 96.638008 | 99.1 | 87.193635 | 0.7 | 0.513281 | 97.0 | 70.204029 | 13.461538 | 79.776723 | ... | 13.319646 | 0.012797 | 5.735917 | 0.00000 | 2.194277 | 8.214352 | 73.091236 | 100.000000 | 97.010137 | 55.650726 | 14.513025 | 100.000000 | 0.076992 | 25.000000 | 54.202489 | 90.301724 | 0.911074 | 72.049689 | 33.016810 | 0.000000 | 2.399589 | 0.051243 | 32.837162 | 7.440405 | 88.925959 | 0.289451 | 99.204414 | 244.528002 | 75.324009 | 77.786583 | 17.849352 | 65.342545 | 9.038572 | 3.511802 | 14.219919 | 5.757052 | 1.381693 | 0.518135 | 0.230282 | Wed13Dec17 |
8055 | 6115041000 | 7357 | 0.0 | NaN | 95901.0 | 6115 | Yuba | rural | 0.162931 | 59.425125 | 40.574875 | No | 3.0 | 2.0 | 0.108905 | 53.355576 | -0.230091 | 38.816887 | 0.814518 | 93.288849 | 0.642266 | 69.228795 | -0.113310 | 34.300013 | 0.050840 | 48.517901 | 0.006159 | 49.659951 | 0.964945 | 87.398948 | 87.2 | 69.228795 | 96.7 | 65.161042 | 80.9 | 85.859104 | 97.9 | 80.123187 | 1.926101 | 16.450661 | ... | 62.158347 | 0.001712 | 1.488515 | 4.90689 | 6.454510 | 21.815149 | 93.609650 | 99.728150 | 93.404337 | 75.224543 | 72.436802 | 8.425721 | 72.719107 | 12.621359 | 88.117541 | 100.000000 | 80.816117 | 92.452830 | 91.338381 | 9.601637 | 20.775055 | 0.051243 | 32.837162 | 7.440405 | 88.925959 | 1.826254 | 94.661876 | 874.587064 | 5.889901 | 78.128151 | 20.531246 | 78.673372 | 0.679625 | 2.378687 | 12.450727 | 3.901047 | 1.699062 | 0.108740 | 0.108740 | Wed13Dec17 |
8056 | 6115041100 | 4941 | 0.1 | NaN | 95925.0 | 6115 | Yuba | rural | -0.218389 | 36.661106 | 63.338894 | No | 4.0 | 3.0 | -0.888546 | 17.169254 | 0.366317 | 70.627486 | -0.041317 | 41.473117 | -0.676661 | 23.777749 | 0.890610 | 98.601309 | 0.339119 | 67.150006 | 0.252748 | 84.242269 | -0.886599 | 15.128962 | 71.2 | 23.777749 | 96.1 | 60.772488 | 71.6 | 71.679713 | 97.7 | 77.839086 | 7.974980 | 62.530476 | ... | 12.613884 | 0.001909 | 1.642500 | 44.24206 | 20.325934 | 54.332115 | 99.255742 | 98.846387 | 92.364943 | 70.155791 | 54.433466 | 7.482993 | 78.365200 | 24.864865 | 54.702939 | 93.333333 | 2.412421 | 50.561798 | 3.438984 | 16.290091 | 25.766714 | 0.055122 | 23.970230 | 7.021225 | 90.414475 | 0.135172 | 99.692031 | 558.463539 | 41.370461 | 78.684399 | 24.881304 | 83.383930 | 0.526209 | 1.032180 | 6.354989 | 5.100182 | 3.238211 | 0.263105 | 0.101194 | Wed13Dec17 |
8057 rows × 91 columns
# view column data types
df_hpi_raw.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 8057 entries, 0 to 8056 Data columns (total 91 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 CensusTract 8057 non-null int64 1 pop2010 8057 non-null int64 2 pct2010gq 8024 non-null float64 3 City 6972 non-null object 4 ZIP 8035 non-null float64 5 County_FIPS 8057 non-null int64 6 County_Name 8057 non-null object 7 UrbanType 8057 non-null object 8 hpi2score 7793 non-null float64 9 hpi2_pctile_pos 7793 non-null float64 10 hpi2_pctile_neg 7793 non-null float64 11 hpi_top25pct 7793 non-null object 12 quintiles 7793 non-null float64 13 quartiles 7793 non-null float64 14 economic 7793 non-null float64 15 economic_pctile 7793 non-null float64 16 education 7793 non-null float64 17 education_pctile 7793 non-null float64 18 housing 7793 non-null float64 19 houseing_pctile 7793 non-null float64 20 healthcareaccess 7793 non-null float64 21 healthcareaccess_pctile 7793 non-null float64 22 neighborhood 7793 non-null float64 23 neighborhood_pctile 7793 non-null float64 24 pollution 7793 non-null float64 25 pollution_pctile 7793 non-null float64 26 transportation 7793 non-null float64 27 transportation_pctile 7793 non-null float64 28 social 7793 non-null float64 29 social_pctile 7793 non-null float64 30 insured 7793 non-null float64 31 insured_pctile 7793 non-null float64 32 uncrowded 7793 non-null float64 33 uncrowded_pctile 7793 non-null float64 34 homeownership 7793 non-null float64 35 homeownership_pctile 7793 non-null float64 36 automobile 7793 non-null float64 37 automobile_pctile 7793 non-null float64 38 commute 7793 non-null float64 39 commute_pctile 7793 non-null float64 40 inpreschool 7793 non-null float64 41 inpreschool_pctile 7793 non-null float64 42 inhighschool 7793 non-null float64 43 inhighschool_pctile 7793 non-null float64 44 bachelorsed 7793 non-null float64 45 bachelorsed_pctile 7793 non-null float64 46 employed 7793 non-null float64 47 employed_pctile 7793 non-null float64 48 abovepoverty 7793 non-null float64 49 abovepoverty_pctile 7793 non-null float64 50 income 7793 non-null float64 51 income_pctile 7793 non-null float64 52 retail 7793 non-null float64 53 retail_pctile 7793 non-null float64 54 parkaccess 7793 non-null float64 55 parkaccess_pctile 7793 non-null float64 56 treecanopy 7793 non-null float64 57 treecanopy_pctile 7793 non-null float64 58 alcoffsale 7793 non-null float64 59 alcoffsale_pctile 7793 non-null float64 60 voting 7793 non-null float64 61 voting_pctile 7793 non-null float64 62 ownsevere 7793 non-null float64 63 ownsevere_pctile 7793 non-null float64 64 rentsevere 7793 non-null float64 65 rentsevere_pctile 7793 non-null float64 66 houserepair 7793 non-null float64 67 houserepair_pctile 7793 non-null float64 68 twoparents 7793 non-null float64 69 twoparents_pctile 7793 non-null float64 70 supermkts 7793 non-null float64 71 supermkts_pctile 7793 non-null float64 72 ozone 7793 non-null float64 73 ozone_pctile 7793 non-null float64 74 pm25 7793 non-null float64 75 pm25_pctile 7793 non-null float64 76 dieselpm 7793 non-null float64 77 dieselpm_pctile 7793 non-null float64 78 h20contam 7793 non-null float64 79 h20contam_pctile 7793 non-null float64 80 LEB 7793 non-null float64 81 LEB_pctile 7793 non-null float64 82 white_pct 8057 non-null float64 83 black_pct 8057 non-null float64 84 asian_pct 8057 non-null float64 85 latino_pct 8057 non-null float64 86 multiple_pct 8057 non-null float64 87 NativeAm_pct 8057 non-null float64 88 PacificIsl_pct 8057 non-null float64 89 other_pct 8057 non-null float64 90 version 8057 non-null object dtypes: float64(83), int64(3), object(5) memory usage: 5.6+ MB
# run basic stats
df_hpi_raw.describe()
CensusTract | pop2010 | pct2010gq | ZIP | County_FIPS | hpi2score | hpi2_pctile_pos | hpi2_pctile_neg | quintiles | quartiles | economic | economic_pctile | education | education_pctile | housing | houseing_pctile | healthcareaccess | healthcareaccess_pctile | neighborhood | neighborhood_pctile | pollution | pollution_pctile | transportation | transportation_pctile | social | social_pctile | insured | insured_pctile | uncrowded | uncrowded_pctile | homeownership | homeownership_pctile | automobile | automobile_pctile | commute | commute_pctile | inpreschool | inpreschool_pctile | inhighschool | inhighschool_pctile | ... | income | income_pctile | retail | retail_pctile | parkaccess | parkaccess_pctile | treecanopy | treecanopy_pctile | alcoffsale | alcoffsale_pctile | voting | voting_pctile | ownsevere | ownsevere_pctile | rentsevere | rentsevere_pctile | houserepair | houserepair_pctile | twoparents | twoparents_pctile | supermkts | supermkts_pctile | ozone | ozone_pctile | pm25 | pm25_pctile | dieselpm | dieselpm_pctile | h20contam | h20contam_pctile | LEB | LEB_pctile | white_pct | black_pct | asian_pct | latino_pct | multiple_pct | NativeAm_pct | PacificIsl_pct | other_pct | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 8.057000e+03 | 8057.000000 | 8024.000000 | 8035.000000 | 8057.000000 | 7.793000e+03 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7.793000e+03 | 7793.000000 | 7.793000e+03 | 7793.000000 | 7.793000e+03 | 7793.000000 | 7.793000e+03 | 7793.000000 | 7.793000e+03 | 7793.000000 | 7.793000e+03 | 7793.000000 | 7.793000e+03 | 7793.000000 | 7.793000e+03 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | ... | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 7793.000000 | 8057.000000 | 8057.000000 | 8057.000000 | 8057.000000 | 8057.000000 | 8057.000000 | 8057.000000 | 8057.000000 |
mean | 6.054898e+09 | 4623.799926 | 2.307515 | 92836.608214 | 6054.690952 | 2.438298e-12 | 50.006416 | 49.993584 | 3.000000 | 2.500064 | 5.646001e-12 | 50.006416 | 1.411520e-12 | 50.006111 | -1.283313e-12 | 50.006416 | 1.283205e-13 | 50.003169 | -1.411457e-12 | 50.006416 | 1.013758e-11 | 50.005838 | 2.309760e-12 | 50.006349 | 1.154779e-12 | 50.006416 | 79.408623 | 50.003169 | 90.698576 | 50.003103 | 54.337842 | 50.003304 | 92.098127 | 50.002863 | 9.654941 | 50.006042 | 52.267040 | 50.003388 | 97.785780 | 76.226906 | ... | 67385.515155 | 50.005502 | 1.949688 | 50.006416 | 74.356703 | 50.004012 | 8.126024 | 50.006414 | 54.182934 | 50.005445 | 67.711987 | 50.006416 | 13.323066 | 50.004000 | 26.288570 | 50.003986 | 98.647874 | 50.002347 | 76.420797 | 50.006085 | 49.747290 | 50.006416 | 0.047431 | 49.996880 | 10.392395 | 49.997292 | 19.225058 | 49.994518 | 472.535176 | 49.994858 | 80.905281 | 50.006416 | 41.399286 | 5.858864 | 12.423550 | 36.288136 | 2.599598 | 0.460792 | 0.332727 | 0.227466 |
std | 2.653142e+07 | 1984.696591 | 9.574740 | 3445.164575 | 26.609992 | 5.487532e-01 | 28.869366 | 28.869366 | 1.414123 | 1.118048 | 8.704671e-01 | 28.869366 | 6.825760e-01 | 28.869144 | 6.491728e-01 | 28.869366 | 1.000000e+00 | 28.869258 | 3.871541e-01 | 28.869366 | 6.334197e-01 | 28.869435 | 3.696637e-01 | 28.869398 | 8.297582e-01 | 28.869366 | 12.131079 | 28.869258 | 9.805062 | 28.865514 | 23.564124 | 28.869302 | 8.632851 | 28.868033 | 11.714788 | 28.869568 | 26.789801 | 28.860150 | 5.401372 | 38.755211 | ... | 32776.843403 | 28.869438 | 7.922636 | 28.869366 | 32.719631 | 28.107014 | 8.888843 | 28.869366 | 33.062726 | 28.855918 | 10.869562 | 28.869366 | 8.615520 | 28.869376 | 11.563002 | 28.869253 | 2.319737 | 28.040426 | 12.942265 | 28.869308 | 35.983316 | 28.845761 | 0.010272 | 28.794412 | 2.580386 | 28.766065 | 16.699284 | 28.868945 | 249.492410 | 28.867119 | 3.465141 | 28.869366 | 27.003564 | 9.384340 | 14.655063 | 26.290103 | 1.480804 | 1.416903 | 0.584286 | 0.235862 |
min | 6.001400e+09 | 0.000000 | 0.000000 | 32.000000 | 6001.000000 | -1.994633e+00 | 0.012832 | 0.000000 | 1.000000 | 1.000000 | -2.772752e+00 | 0.012832 | -4.727192e+00 | 0.012832 | -6.071828e+00 | 0.012832 | -4.551007e+00 | 0.012832 | -1.161717e+00 | 0.012832 | -3.431559e+00 | 0.012832 | -2.786433e+00 | 0.012832 | -3.421043e+00 | 0.012832 | 24.200000 | 0.012832 | 29.600000 | 0.012832 | 0.000000 | 0.153984 | 8.800000 | 0.012832 | 0.000000 | 1.039394 | 0.000000 | 1.873476 | 18.000000 | 0.012832 | ... | 11925.000000 | 0.012832 | 0.000053 | 0.012832 | 0.000000 | 2.194277 | 0.043034 | 0.012832 | 0.000000 | 4.516874 | 19.559846 | 0.012832 | 0.000000 | 0.076992 | 0.000000 | 0.012832 | 55.353075 | 0.012832 | 0.000000 | 0.025664 | 0.000000 | 2.399589 | 0.026421 | 0.885410 | 1.651081 | 0.064160 | 0.021181 | 0.000000 | 6.919810 | 0.000000 | 64.758363 | 0.012832 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
25% | 6.037265e+09 | 3350.000000 | 0.000000 | 91602.000000 | 6037.000000 | -4.093288e-01 | 25.009624 | 24.996792 | 2.000000 | 2.000000 | -6.476213e-01 | 25.009624 | -4.532697e-01 | 25.009624 | -3.768373e-01 | 25.009624 | -6.354442e-01 | 24.932632 | -2.246448e-01 | 25.009624 | -4.531551e-01 | 25.022456 | -1.729056e-01 | 25.009624 | -5.734184e-01 | 25.009624 | 71.700000 | 24.932632 | 86.500000 | 24.971128 | 36.700000 | 24.983960 | 89.800000 | 24.894136 | 2.826855 | 25.009624 | 32.600000 | 25.060952 | 98.400000 | 24.983960 | ... | 42795.000000 | 25.009624 | 0.258063 | 25.009624 | 54.457690 | 25.009624 | 3.610046 | 25.009624 | 25.986079 | 25.009624 | 59.957036 | 25.009624 | 8.040201 | 24.996792 | 18.493151 | 24.996792 | 98.134328 | 25.022456 | 68.817204 | 25.009624 | 15.403463 | 25.009624 | 0.040127 | 23.970230 | 8.697944 | 24.688823 | 9.026491 | 24.996792 | 249.617651 | 23.585269 | 78.697686 | 25.009624 | 16.402337 | 1.065163 | 2.831023 | 14.021286 | 1.561428 | 0.144196 | 0.064480 | 0.104759 |
50% | 6.059053e+09 | 4409.000000 | 0.300000 | 92691.000000 | 6059.000000 | 7.770569e-03 | 50.006416 | 49.993584 | 3.000000 | 3.000000 | 2.736376e-02 | 50.006416 | -5.511403e-02 | 50.006416 | 9.439020e-02 | 50.006416 | 1.723983e-01 | 49.967920 | 1.267941e-02 | 50.006416 | 7.561873e-02 | 50.006416 | 8.141831e-03 | 50.006416 | 4.910712e-02 | 50.006416 | 81.500000 | 49.967920 | 94.300000 | 50.160400 | 56.900000 | 50.032080 | 94.700000 | 50.173232 | 5.807478 | 50.006416 | 51.100000 | 50.044912 | 100.000000 | 100.000000 | ... | 60650.000000 | 50.006416 | 0.780438 | 50.006416 | 91.502280 | 50.006416 | 5.105407 | 50.006416 | 58.245119 | 50.006416 | 68.762440 | 50.006416 | 11.747851 | 50.006416 | 26.130653 | 50.006416 | 99.350649 | 49.980752 | 77.932961 | 50.006416 | 48.997602 | 50.006416 | 0.046178 | 53.278583 | 10.370000 | 52.380341 | 16.560000 | 49.993584 | 479.227886 | 49.980752 | 81.063097 | 50.006416 | 41.368078 | 2.517162 | 7.109079 | 28.490028 | 2.547771 | 0.267329 | 0.169147 | 0.188844 |
75% | 6.073016e+09 | 5651.000000 | 1.100000 | 94558.000000 | 6073.000000 | 4.103426e-01 | 75.003208 | 74.990376 | 4.000000 | 3.000000 | 6.556297e-01 | 75.003208 | 4.753215e-01 | 75.003208 | 4.548955e-01 | 75.003208 | 7.824017e-01 | 74.964712 | 2.002337e-01 | 75.003208 | 4.585913e-01 | 75.003208 | 1.527340e-01 | 75.003208 | 6.372380e-01 | 75.003208 | 88.900000 | 74.964712 | 97.800000 | 74.489927 | 73.500000 | 74.926216 | 97.400000 | 74.579751 | 11.592040 | 75.003208 | 71.700000 | 74.990376 | 100.000000 | 100.000000 | ... | 84611.000000 | 75.003208 | 1.787546 | 75.003208 | 100.000000 | 81.355062 | 8.637490 | 75.003208 | 83.753018 | 75.003208 | 76.004847 | 75.003208 | 16.748768 | 75.003208 | 33.834586 | 74.990376 | 100.000000 | 80.816117 | 85.926223 | 75.003208 | 84.675025 | 75.003208 | 0.055122 | 75.811626 | 12.050000 | 74.759399 | 24.609648 | 74.977544 | 664.069078 | 74.990376 | 83.236109 | 75.003208 | 65.422343 | 6.230256 | 15.683254 | 55.393401 | 3.517078 | 0.467175 | 0.379147 | 0.299670 |
max | 6.115041e+09 | 37452.000000 | 100.000000 | 96161.000000 | 6115.000000 | 1.532227e+00 | 100.000000 | 99.987168 | 5.000000 | 4.000000 | 2.453069e+00 | 100.000000 | 1.696938e+00 | 100.000000 | 1.363530e+00 | 100.000000 | 1.697407e+00 | 99.974336 | 8.350715e+00 | 100.000000 | 1.568937e+00 | 100.000000 | 2.053622e+00 | 100.000000 | 2.129654e+00 | 100.000000 | 100.000000 | 99.974336 | 100.000000 | 96.933145 | 98.700000 | 100.000000 | 100.000000 | 98.986270 | 92.712843 | 100.000000 | 100.000000 | 95.714102 | 100.000000 | 100.000000 | ... | 248750.000000 | 100.000000 | 325.342367 | 100.000000 | 100.000000 | 81.355062 | 74.268719 | 100.000000 | 100.000000 | 97.010137 | 94.715657 | 100.000000 | 100.000000 | 99.127422 | 80.000000 | 99.088926 | 100.000000 | 80.816117 | 100.000000 | 99.563711 | 100.000000 | 94.251251 | 0.067829 | 99.884512 | 19.600000 | 99.987168 | 253.730796 | 99.987168 | 1245.651010 | 99.987168 | 89.995435 | 100.000000 | 100.000000 | 89.759666 | 89.807290 | 100.000000 | 27.272727 | 78.526800 | 14.201342 | 7.797695 |
8 rows × 86 columns
df_hpi_raw.shape
(8057, 91)
# check unique values
df_hpi_raw['hpi2score'].unique()
array([ 1.1820283 , 1.3258738 , 0.9256657 , ..., -0.79174168, 0.16293131, -0.21838948])
# set display to max length of cols to see all counts
pd.set_option('display.max_rows', 91)
# check # missing values
hpi_missing_cols = df_hpi_raw.isnull().sum()
print(hpi_missing_cols.sort_values(ascending=False))
City 1085 bachelorsed_pctile 264 commute_pctile 264 insured_pctile 264 uncrowded 264 uncrowded_pctile 264 homeownership 264 homeownership_pctile 264 automobile 264 automobile_pctile 264 commute 264 inpreschool 264 social_pctile 264 inpreschool_pctile 264 inhighschool 264 inhighschool_pctile 264 bachelorsed 264 hpi2score 264 employed 264 employed_pctile 264 insured 264 social 264 abovepoverty_pctile 264 housing 264 hpi_top25pct 264 quintiles 264 quartiles 264 economic 264 economic_pctile 264 education 264 education_pctile 264 houseing_pctile 264 transportation_pctile 264 healthcareaccess 264 healthcareaccess_pctile 264 neighborhood 264 neighborhood_pctile 264 pollution 264 pollution_pctile 264 transportation 264 abovepoverty 264 income 264 hpi2_pctile_pos 264 houserepair_pctile 264 LEB_pctile 264 LEB 264 h20contam_pctile 264 h20contam 264 dieselpm_pctile 264 dieselpm 264 pm25_pctile 264 pm25 264 ozone_pctile 264 ozone 264 supermkts_pctile 264 supermkts 264 income_pctile 264 twoparents 264 twoparents_pctile 264 houserepair 264 alcoffsale 264 retail 264 retail_pctile 264 parkaccess 264 rentsevere_pctile 264 treecanopy 264 treecanopy_pctile 264 parkaccess_pctile 264 alcoffsale_pctile 264 voting 264 voting_pctile 264 ownsevere 264 ownsevere_pctile 264 rentsevere 264 hpi2_pctile_neg 264 pct2010gq 33 ZIP 22 UrbanType 0 County_Name 0 County_FIPS 0 pop2010 0 version 0 other_pct 0 white_pct 0 black_pct 0 asian_pct 0 latino_pct 0 multiple_pct 0 NativeAm_pct 0 PacificIsl_pct 0 CensusTract 0 dtype: int64
# check if any rows are missing more than 70%
seventy_percent = 8057 * .7
df_hpi_raw.columns[df_hpi_raw.isnull().sum() > seventy_percent]
Index([], dtype='object')
# view column names
hpi_raw_columns = df_hpi_raw.columns
print(hpi_raw_columns)
Index(['CensusTract', 'pop2010', 'pct2010gq', 'City', 'ZIP', 'County_FIPS', 'County_Name', 'UrbanType', 'hpi2score', 'hpi2_pctile_pos', 'hpi2_pctile_neg', 'hpi_top25pct', 'quintiles', 'quartiles', 'economic', 'economic_pctile', 'education', 'education_pctile', 'housing', 'houseing_pctile', 'healthcareaccess', 'healthcareaccess_pctile', 'neighborhood', 'neighborhood_pctile', 'pollution', 'pollution_pctile', 'transportation', 'transportation_pctile', 'social', 'social_pctile', 'insured', 'insured_pctile', 'uncrowded', 'uncrowded_pctile', 'homeownership', 'homeownership_pctile', 'automobile', 'automobile_pctile', 'commute', 'commute_pctile', 'inpreschool', 'inpreschool_pctile', 'inhighschool', 'inhighschool_pctile', 'bachelorsed', 'bachelorsed_pctile', 'employed', 'employed_pctile', 'abovepoverty', 'abovepoverty_pctile', 'income', 'income_pctile', 'retail', 'retail_pctile', 'parkaccess', 'parkaccess_pctile', 'treecanopy', 'treecanopy_pctile', 'alcoffsale', 'alcoffsale_pctile', 'voting', 'voting_pctile', 'ownsevere', 'ownsevere_pctile', 'rentsevere', 'rentsevere_pctile', 'houserepair', 'houserepair_pctile', 'twoparents', 'twoparents_pctile', 'supermkts', 'supermkts_pctile', 'ozone', 'ozone_pctile', 'pm25', 'pm25_pctile', 'dieselpm', 'dieselpm_pctile', 'h20contam', 'h20contam_pctile', 'LEB', 'LEB_pctile', 'white_pct', 'black_pct', 'asian_pct', 'latino_pct', 'multiple_pct', 'NativeAm_pct', 'PacificIsl_pct', 'other_pct', 'version'], dtype='object')
# drop columns we don't need
cols_to_keep = ['City', 'ZIP', 'County_FIPS', 'County_Name', 'hpi2_pctile_pos',
'hpi2_pctile_neg', 'hpi_top25pct', 'quintiles', 'quartiles',
'healthcareaccess', 'healthcareaccess_pctile']
cols_to_drop = ['UrbanType', 'economic',
'economic_pctile', 'education', 'education_pctile', 'housing',
'houseing_pctile', 'healthcareaccess', 'healthcareaccess_pctile',
'neighborhood', 'neighborhood_pctile', 'pollution', 'pollution_pctile',
'transportation', 'transportation_pctile', 'social', 'social_pctile',
'insured', 'insured_pctile', 'uncrowded', 'uncrowded_pctile',
'homeownership', 'homeownership_pctile', 'automobile',
'automobile_pctile', 'commute', 'commute_pctile', 'inpreschool',
'inpreschool_pctile', 'inhighschool', 'inhighschool_pctile',
'bachelorsed', 'bachelorsed_pctile', 'employed', 'employed_pctile',
'abovepoverty', 'abovepoverty_pctile', 'income', 'income_pctile',
'retail', 'retail_pctile', 'parkaccess', 'parkaccess_pctile',
'treecanopy', 'treecanopy_pctile', 'alcoffsale', 'alcoffsale_pctile',
'voting', 'voting_pctile', 'ownsevere', 'ownsevere_pctile',
'rentsevere', 'rentsevere_pctile', 'houserepair', 'houserepair_pctile',
'twoparents', 'twoparents_pctile', 'supermkts', 'supermkts_pctile',
'ozone', 'ozone_pctile', 'pm25', 'pm25_pctile', 'dieselpm',
'dieselpm_pctile', 'h20contam', 'h20contam_pctile', 'LEB', 'LEB_pctile',
'white_pct', 'black_pct', 'asian_pct', 'latino_pct', 'multiple_pct',
'NativeAm_pct', 'PacificIsl_pct', 'other_pct', 'version']
df_hpi = df_hpi_raw.drop([], axis=1)
No rows are missing more than 70 percent of values. The highest percent of missing values is the city column which is missing 13%.
# view missing zip code rows
df_hpi[df_hpi['ZIP'].isnull()]
CensusTract | pop2010 | pct2010gq | City | ZIP | County_FIPS | County_Name | UrbanType | hpi2score | hpi2_pctile_pos | hpi2_pctile_neg | hpi_top25pct | quintiles | quartiles | economic | economic_pctile | education | education_pctile | housing | houseing_pctile | healthcareaccess | healthcareaccess_pctile | neighborhood | neighborhood_pctile | pollution | pollution_pctile | transportation | transportation_pctile | social | social_pctile | insured | insured_pctile | uncrowded | uncrowded_pctile | homeownership | homeownership_pctile | automobile | automobile_pctile | commute | commute_pctile | ... | income_pctile | retail | retail_pctile | parkaccess | parkaccess_pctile | treecanopy | treecanopy_pctile | alcoffsale | alcoffsale_pctile | voting | voting_pctile | ownsevere | ownsevere_pctile | rentsevere | rentsevere_pctile | houserepair | houserepair_pctile | twoparents | twoparents_pctile | supermkts | supermkts_pctile | ozone | ozone_pctile | pm25 | pm25_pctile | dieselpm | dieselpm_pctile | h20contam | h20contam_pctile | LEB | LEB_pctile | white_pct | black_pct | asian_pct | latino_pct | multiple_pct | NativeAm_pct | PacificIsl_pct | other_pct | version | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
360 | 6001990000 | 0 | NaN | Fremont | NaN | 6001 | Alameda | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
644 | 6013990000 | 0 | NaN | NaN | NaN | 6013 | Contra Costa | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
652 | 6015990000 | 0 | NaN | NaN | NaN | 6015 | Del Norte | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
695 | 6017990000 | 0 | NaN | NaN | NaN | 6017 | El Dorado | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
931 | 6023990100 | 0 | NaN | NaN | NaN | 6023 | Humboldt | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
3514 | 6037990100 | 0 | NaN | NaN | NaN | 6037 | Los Angeles | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
3515 | 6037990200 | 0 | NaN | NaN | NaN | 6037 | Los Angeles | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
3516 | 6037990300 | 0 | NaN | NaN | NaN | 6037 | Los Angeles | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
3595 | 6041990100 | 0 | NaN | NaN | NaN | 6041 | Marin | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
3622 | 6045990100 | 0 | NaN | NaN | NaN | 6045 | Mendocino | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
3772 | 6053990000 | 0 | NaN | NaN | NaN | 6053 | Monterey | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
4415 | 6059990100 | 0 | NaN | NaN | NaN | 6059 | Orange | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
4500 | 6061990000 | 0 | NaN | NaN | NaN | 6061 | Placer | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
6285 | 6073990100 | 0 | NaN | San Diego | NaN | 6073 | San Diego | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
6478 | 6075980401 | 0 | NaN | San Francisco | NaN | 6075 | San Francisco | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
6482 | 6075990100 | 0 | NaN | NaN | NaN | 6075 | San Francisco | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
6675 | 6079990000 | 0 | NaN | NaN | NaN | 6079 | San Luis Obispo | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
6833 | 6081990100 | 0 | NaN | Menlo Park | NaN | 6081 | San Mateo | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
6923 | 6083990000 | 0 | NaN | NaN | NaN | 6083 | Santa Barbara | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
7348 | 6087990100 | 0 | NaN | NaN | NaN | 6087 | Santa Cruz | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
7607 | 6097990100 | 0 | NaN | NaN | NaN | 6097 | Sonoma | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
8001 | 6111990100 | 0 | NaN | NaN | NaN | 6111 | Ventura | no_data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | Wed13Dec17 |
22 rows × 91 columns
# check # missing values in HPI2 score column
df_hpi['hpi2score'].isnull().sum()
264
# create column percentage of covid deaths over total cases
ca_df_raw["case_fatality_rate"] = (ca_df_raw["covid_deaths"]/ca_df_raw["covid_total"])*100
ca_df_raw.head()
state | abbreviation | county | fips | t1_socioecon | t2_household | t3_minority | t4_housing | epidim | healthcare_sys | ccvi_score | long | lat | covid_total | covid_deaths | todays_date | covid_confirmed | covid_suspected | covid_total_hospitalized | all_hospital_beds | icu_covid_confirmed_patients | icu_suspected_covid_patients | icu_available_beds | population | covid_cases_per_100k | covid_deaths_per_100k | case_fatality_rate | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
186 | California | CA | Alameda | 6001 | 0.184 | 0.028 | 0.971 | 0.604 | 0.093 | 0.022 | 0.097 | -121.892927 | 37.646294 | 79471 | 1175 | 2021-02-20 | 176 | 26 | 202 | 2481 | 58 | 1 | 78 | 1680480 | 4729.065505 | 69.920499 | 1.478527 |
187 | California | CA | Alpine | 6003 | 0.813 | 0.787 | 0.747 | 0.989 | 0.288 | 0.003 | 0.515 | -119.822359 | 38.596786 | 80 | 0 | 2021-02-20 | None | None | None | None | None | None | None | 1209 | 6617.038875 | 0.000000 | 0.000000 |
188 | California | CA | Amador | 6005 | 0.387 | 0.303 | 0.612 | 0.593 | 0.396 | 0.006 | 0.156 | -120.656960 | 38.445831 | 3427 | 39 | 2021-02-20 | 6 | 2 | 8 | 52 | 1 | 1 | 0 | 40446 | 8473.025763 | 96.424863 | 1.138022 |
189 | California | CA | Butte | 6007 | 0.607 | 0.327 | 0.776 | 0.758 | 0.057 | 0.111 | 0.235 | -121.600525 | 39.667278 | 10823 | 156 | 2021-02-20 | 11 | 0 | 11 | 451 | 2 | 0 | 11 | 196880 | 5497.257213 | 79.236083 | 1.441375 |
190 | California | CA | Calaveras | 6009 | 0.362 | 0.639 | 0.548 | 0.126 | 0.230 | 0.032 | 0.098 | -120.552913 | 38.205371 | 1898 | 25 | 2021-02-20 | 2 | 0 | 2 | 33 | 1 | 0 | 7 | 46319 | 4097.670502 | 53.973531 | 1.317176 |
# Create a TSA dataframe for total cases
tsa_total_cases = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")
tsa_total_cases = tsa_total_cases.loc[df_covid_cc_US['Province_State'] == "California"]
# Grab the columns that we want
tsa_total_cases_clean = tsa_total_cases.iloc[:,10:-1]
# Clean Combined_Key Column
tsa_total_cases_clean['Combined_Key'] = tsa_total_cases_clean['Combined_Key'].apply(lambda x: x.split(",")[0])
# extract rows 'Out of CA', 'Unassigned'
tsa_total_cases_clean.drop(tsa_total_cases_clean.loc[tsa_total_cases_clean['Combined_Key']=="Out of CA"].index, inplace=True)
tsa_total_cases_clean.drop(tsa_total_cases_clean.loc[tsa_total_cases_clean['Combined_Key']=="Unassigned"].index, inplace=True)
tsa_total_cases_clean.rename(columns={"Combined_Key":"Date"}, inplace = True)
tsa_total_cases_clean = tsa_total_cases_clean.set_index("Date")
tsa_total_cases_clean = tsa_total_cases_clean.transpose()
# Change the index type to datetime
#tsa_total_cases_clean.index = pd.to_datetime(tsa_total_cases_clean.index)
# Add new date
tsa_total_cases_clean["New Date"] = tsa_total_cases_clean.index
tsa_total_cases_clean.tail()
Date | Alameda | Alpine | Amador | Butte | Calaveras | Colusa | Contra Costa | Del Norte | El Dorado | Fresno | Glenn | Humboldt | Imperial | Inyo | Kern | Kings | Lake | Lassen | Los Angeles | Madera | Marin | Mariposa | Mendocino | Merced | Modoc | Mono | Monterey | Napa | Nevada | Orange | Placer | Plumas | Riverside | Sacramento | San Benito | San Bernardino | San Diego | San Francisco | San Joaquin | San Luis Obispo | San Mateo | Santa Barbara | Santa Clara | Santa Cruz | Shasta | Sierra | Siskiyou | Solano | Sonoma | Stanislaus | Sutter | Tehama | Trinity | Tulare | Tuolumne | Ventura | Yolo | Yuba | New Date |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2/15/21 | 78714 | 79 | 3398 | 10717 | 1866 | 2069 | 60710 | 931 | 8855 | 93065 | 2184 | 3025 | 26696 | 1193 | 100434 | 21647 | 3030 | 5498 | 1168372 | 15089 | 12876 | 388 | 3723 | 27701 | 443 | 1203 | 41341 | 8826 | 3761 | 256889 | 19313 | 641 | 285414 | 90856 | 5641 | 282494 | 252943 | 33374 | 64969 | 18889 | 37859 | 30862 | 107619 | 14232 | 10720 | 97 | 1664 | 29251 | 27361 | 54325 | 8656 | 4930 | 321 | 46714 | 3800 | 75742 | 12399 | 5583 | 2/15/21 |
2/16/21 | 78883 | 79 | 3409 | 10723 | 1871 | 2086 | 60834 | 938 | 8973 | 93231 | 2185 | 3025 | 26779 | 1217 | 100586 | 21658 | 3030 | 5546 | 1169688 | 15108 | 12920 | 388 | 3732 | 28201 | 446 | 1203 | 41580 | 8837 | 3774 | 257589 | 19433 | 647 | 286590 | 91091 | 5649 | 282494 | 253641 | 33454 | 65359 | 19210 | 37969 | 30991 | 107769 | 14344 | 10810 | 97 | 1683 | 29580 | 27361 | 54484 | 8708 | 4930 | 336 | 46934 | 3853 | 75889 | 12412 | 5634 | 2/16/21 |
2/17/21 | 79016 | 79 | 3413 | 10748 | 1899 | 2110 | 60996 | 947 | 8982 | 93363 | 2189 | 3036 | 26797 | 1224 | 100849 | 21696 | 3041 | 5546 | 1171865 | 15120 | 12946 | 388 | 3743 | 28306 | 448 | 1205 | 41632 | 8854 | 3813 | 257816 | 19457 | 647 | 287063 | 91246 | 5659 | 282736 | 254180 | 33491 | 65659 | 19248 | 38041 | 31067 | 107980 | 14359 | 10810 | 98 | 1689 | 29643 | 27414 | 54631 | 8722 | 4930 | 339 | 47220 | 3857 | 76043 | 12457 | 5649 | 2/17/21 |
2/18/21 | 79092 | 79 | 3415 | 10770 | 1896 | 2110 | 60996 | 947 | 8982 | 93555 | 2189 | 3053 | 26817 | 1224 | 100849 | 21696 | 3057 | 5546 | 1174340 | 15135 | 12946 | 389 | 3743 | 28306 | 446 | 1204 | 41632 | 8865 | 3822 | 258182 | 19462 | 647 | 287200 | 91309 | 5661 | 283356 | 254990 | 33558 | 65659 | 19248 | 38124 | 31067 | 108255 | 14359 | 10834 | 98 | 1689 | 29690 | 27414 | 54728 | 8722 | 4930 | 339 | 47220 | 3857 | 76234 | 12506 | 5649 | 2/18/21 |
2/19/21 | 79297 | 80 | 3417 | 10791 | 1898 | 2117 | 61284 | 955 | 9017 | 93734 | 2195 | 3067 | 26840 | 1243 | 101370 | 21797 | 3066 | 5558 | 1176843 | 15175 | 12990 | 389 | 3769 | 28489 | 446 | 1206 | 41787 | 8877 | 3852 | 258567 | 19520 | 649 | 287534 | 91797 | 5697 | 283873 | 255802 | 33650 | 65891 | 19340 | 38229 | 31334 | 108462 | 14438 | 10868 | 99 | 1701 | 29766 | 27542 | 54887 | 8766 | 4983 | 340 | 47399 | 3870 | 76442 | 12549 | 5681 | 2/19/21 |
# Create a TSA dataframe for total deaths
tsa_total_death = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv")
tsa_total_death = tsa_total_death.loc[df_covid_deaths_US['Province_State'] == "California"]
# Grab the columns that we want
tsa_total_death_clean = tsa_total_death.iloc[:,10:-1]
tsa_total_death_clean.drop(columns = ["Population"],inplace = True, axis=1)
# Clean Combined_Key Column
tsa_total_death_clean['Combined_Key'] = tsa_total_death_clean['Combined_Key'].apply(lambda x: x.split(",")[0])
# # extract rows 'Out of CA', 'Unassigned'
tsa_total_death_clean.drop(tsa_total_death_clean.loc[tsa_total_death_clean['Combined_Key']=="Out of CA"].index, inplace=True)
tsa_total_death_clean.drop(tsa_total_death_clean.loc[tsa_total_death_clean['Combined_Key']=="Unassigned"].index, inplace=True)
tsa_total_death_clean.rename(columns={"Combined_Key":"Date"}, inplace = True)
tsa_total_death_clean = tsa_total_death_clean.set_index("Date")
tsa_total_death_clean = tsa_total_death_clean.transpose()
# Change the index type to datetime
#tsa_total_cases_clean.index = pd.to_datetime(tsa_total_cases_clean.index)
# Add new date
tsa_total_death_clean["New Date"] = tsa_total_death_clean.index
tsa_total_death_clean.tail()
Date | Alameda | Alpine | Amador | Butte | Calaveras | Colusa | Contra Costa | Del Norte | El Dorado | Fresno | Glenn | Humboldt | Imperial | Inyo | Kern | Kings | Lake | Lassen | Los Angeles | Madera | Marin | Mariposa | Mendocino | Merced | Modoc | Mono | Monterey | Napa | Nevada | Orange | Placer | Plumas | Riverside | Sacramento | San Benito | San Bernardino | San Diego | San Francisco | San Joaquin | San Luis Obispo | San Mateo | Santa Barbara | Santa Clara | Santa Cruz | Shasta | Sierra | Siskiyou | Solano | Sonoma | Stanislaus | Sutter | Tehama | Trinity | Tulare | Tuolumne | Ventura | Yolo | Yuba | New Date |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2/15/21 | 1121 | 0 | 38 | 149 | 25 | 11 | 622 | 4 | 90 | 1291 | 23 | 32 | 584 | 34 | 687 | 208 | 38 | 19 | 19095 | 189 | 187 | 7 | 40 | 375 | 4 | 4 | 307 | 62 | 74 | 3577 | 222 | 6 | 3524 | 1372 | 57 | 2418 | 3037 | 366 | 992 | 205 | 474 | 372 | 1668 | 168 | 170 | 0 | 13 | 140 | 287 | 899 | 93 | 51 | 5 | 691 | 52 | 732 | 172 | 36 | 2/15/21 |
2/16/21 | 1122 | 0 | 38 | 153 | 25 | 12 | 622 | 4 | 91 | 1341 | 23 | 32 | 584 | 34 | 687 | 209 | 39 | 19 | 19216 | 201 | 188 | 7 | 40 | 382 | 4 | 4 | 309 | 62 | 74 | 3617 | 223 | 6 | 3558 | 1382 | 57 | 2418 | 3042 | 372 | 1018 | 210 | 481 | 376 | 1669 | 170 | 171 | 0 | 13 | 149 | 291 | 905 | 93 | 51 | 5 | 697 | 55 | 751 | 172 | 36 | 2/16/21 |
2/17/21 | 1125 | 0 | 38 | 153 | 25 | 13 | 626 | 4 | 91 | 1349 | 23 | 32 | 584 | 34 | 690 | 213 | 39 | 19 | 19381 | 201 | 192 | 7 | 41 | 382 | 4 | 4 | 310 | 64 | 74 | 3644 | 223 | 6 | 3577 | 1402 | 57 | 2457 | 3099 | 376 | 1018 | 210 | 481 | 381 | 1682 | 170 | 171 | 0 | 13 | 156 | 291 | 910 | 93 | 51 | 5 | 706 | 55 | 767 | 181 | 36 | 2/17/21 |
2/18/21 | 1126 | 0 | 38 | 153 | 25 | 13 | 626 | 4 | 91 | 1349 | 23 | 32 | 584 | 34 | 690 | 213 | 39 | 19 | 19514 | 201 | 192 | 7 | 41 | 382 | 4 | 4 | 310 | 65 | 74 | 3685 | 229 | 6 | 3599 | 1402 | 57 | 2492 | 3135 | 376 | 1018 | 210 | 497 | 381 | 1708 | 170 | 171 | 0 | 13 | 156 | 291 | 916 | 93 | 51 | 5 | 706 | 55 | 767 | 181 | 36 | 2/18/21 |
2/19/21 | 1171 | 0 | 39 | 156 | 25 | 13 | 643 | 5 | 93 | 1367 | 23 | 32 | 584 | 34 | 741 | 213 | 40 | 19 | 19670 | 201 | 194 | 7 | 42 | 388 | 4 | 4 | 315 | 67 | 74 | 3726 | 229 | 6 | 3633 | 1431 | 57 | 2522 | 3169 | 387 | 1026 | 217 | 497 | 392 | 1725 | 172 | 172 | 0 | 13 | 161 | 291 | 921 | 94 | 51 | 5 | 720 | 56 | 782 | 181 | 36 | 2/19/21 |
# TSA Total Cases
import plotly.graph_objects as go
df = tsa_total_cases_clean
# plotly figure setup
fig = go.Figure()
# one trace for each df column
fig.add_trace(go.Scatter(name="Selection 1",x=df["New Date"].values, y=df["Alameda"].values))
# one trace for each df column
fig.add_trace(go.Scatter(name="Selection 2",x=df["New Date"].values, y=df["Alameda"].values))
# one button for each df column
updatemenu= []
buttons=[]
# add second buttons
buttons2 = []
for i in list(df.columns[:-1]):
buttons.append(dict(method='restyle',label = str(i),args=[{'x':[df["New Date"].values],'y':[df[i].values]},[0]])
)
for i in list(df.columns[:-1]):
buttons2.append(dict(method='restyle',label = str(i),args=[{'x':[df["New Date"].values],'y':[df[i].values]},[1]])
)
# some adjustments to the updatemenus
button_layer_1_height = 1.25
updatemenu = list([dict(
buttons = buttons,
direction="down",
pad={"r":10,"t":10},
showactive=True,
x= 0.1,
xanchor="left",
y= button_layer_1_height,
yanchor="top",
font = dict(color = "blue")
),dict(
buttons = buttons2,
direction="down",
pad={"r":10,"t":10},
showactive=True,
x= 0.37,
xanchor="left",
y=button_layer_1_height,
yanchor="top", font = dict(color = "red"))])
fig.update_layout(showlegend=True, updatemenus=updatemenu, title_text = "Covid19 Cases")
fig.update_xaxes(categoryorder= 'array', categoryarray= df.index)
fig.show()
# TSA Total Death
import plotly.graph_objects as go
df1 = tsa_total_death_clean
# plotly figure setup
fig = go.Figure()
# one trace for each df column
fig.add_trace(go.Scatter(name="Selection 1",x=df1["New Date"].values, y=df1["Alameda"].values))
# one trace for each df column
fig.add_trace(go.Scatter(name="Selection 2",x=df1["New Date"].values, y=df1["Alameda"].values))
# one button for each df column
updatemenu= []
buttons=[]
# add second buttons
buttons2 = []
for i in list(df.columns[:-1]):
buttons.append(dict(method='restyle',label = str(i),args=[{'x':[df1["New Date"].values],'y':[df1[i].values]},[0]])
)
for i in list(df.columns[:-1]):
buttons2.append(dict(method='restyle',label = str(i),args=[{'x':[df1["New Date"].values],'y':[df1[i].values]},[1]])
)
# some adjustments to the updatemenus
button_layer_1_height = 1.25
updatemenu = list([dict(
buttons = buttons,
direction="down",
pad={"r":10,"t":10},
showactive=True,
x= 0.1,
xanchor="left",
y= button_layer_1_height,
yanchor="top",
font = dict(color = "blue")
),dict(
buttons = buttons2,
direction="down",
pad={"r":10,"t":10},
showactive=True,
x= 0.37,
xanchor="left",
y=button_layer_1_height,
yanchor="top", font = dict(color = "red"))])
fig.update_layout(showlegend=True, updatemenus=updatemenu, title_text = "Covid19 Deaths")
fig.update_xaxes(categoryorder= 'array', categoryarray= df1.index)
fig.show()
import plotly.graph_objects as go
df = ca_df_raw
date_of_data = str(ca_df_raw.iloc[0,15].date())
# Drop rows that are not relevant for the plot.
df = df.drop(['ccvi_score','state', 'abbreviation','fips', 't1_socioecon',
't2_household', 't3_minority', 't4_housing', 'epidim', 'healthcare_sys',
'long', 'lat', 'population',"todays_date" ],axis=1)
# plotly figure setup
fig = go.Figure()
# one trace for each df column
fig.add_trace(go.Bar(name="Selection 1",x=df['county'].values, y=df["covid_total"].values))
# one trace for each df column
fig.add_trace(go.Bar(name="Selection 2",x=df['county'].values, y=df["covid_total"].values))
# one button for each df column
updatemenu= []
buttons=[]
# add second buttons
buttons2 = []
for i in list(df.columns):
buttons.append(dict(method='restyle',label = str(i),args=[{'x':[df["county"].values],'y':[df[i].values]},[0]])
)
for i in list(df.columns):
buttons2.append(dict(method='restyle',label = str(i),args=[{'x':[df["county"].values],'y':[df[i].values]},[1]])
)
# some adjustments to the updatemenus
button_layer_1_height = 1.23
updatemenu = list([dict(
buttons = buttons,
direction="down",
pad={"r":10,"t":10},
showactive=True,
x= 0.1,
xanchor="left",
y= button_layer_1_height,
yanchor="top",
font = dict(color = "blue")
),dict(
buttons = buttons2,
direction="down",
pad={"r":10,"t":10},
showactive=True,
x= 0.37,
xanchor="left",
y=button_layer_1_height,
yanchor="top",font = dict(color = "red"))])
fig.update_layout(showlegend=True, updatemenus=updatemenu, title_text = date_of_data)
fig.show()
import folium
import copy
from folium.plugins import MarkerCluster
# We take a copy of the original data frame
df_geo = copy.deepcopy(ca_df_raw)
# swap columns long and lat to lat and long.
columns_titles = ['state', 'abbreviation', 'county', 'fips', 't1_socioecon',
't2_household', 't3_minority', 't4_housing', 'epidim', 'healthcare_sys',
'ccvi_score', 'lat','long', 'covid_total', 'covid_deaths',
'todays_date', 'covid_confirmed', 'covid_suspected',
'covid_total_hospitalized', 'all_hospital_beds',
'icu_covid_confirmed_patients', 'icu_suspected_covid_patients',
'icu_available_beds', 'population', 'covid_cases_per_100k',
'covid_deaths_per_100k']
df_geo = df_geo.reindex(columns=columns_titles)
df_geo.head()
# We create a list for each row in the dataframe.
geo_lst = df_geo.values.tolist()
# We create an empty list.
long_lat_lst = []
#We loop through the list and only append the long and lat values.
for i in geo_lst:
long_lat_lst.append((i[12],i[11]))
# we draw the folium map
map = folium.Map(location=[36.17, -119.7462],zoom_start = 6)
for j in range(len(long_lat_lst)):
# We grab the infos that we want to add to the popup.
county_name = df_geo.iloc[j, df_geo.columns.get_loc("county")]
ccvi_score = df_geo.iloc[j, df_geo.columns.get_loc("ccvi_score")]
covid_cases_per_100k = df_geo.iloc[j, df_geo.columns.get_loc("covid_cases_per_100k")]
covid_deaths_per_100k = df_geo.iloc[j, df_geo.columns.get_loc("covid_deaths_per_100k")]
covid_deaths_per100k = df_geo.iloc[j, df_geo.columns.get_loc("covid_deaths")]
popup_text = "County: {county_name} <br>""CCVI Score: {ccvi_score} <br>""Covid Total Per 100K: {covid_cases_per_100k} <br>""Covid Deaths Per 100K: {covid_deaths_per_100k} <br>".format(county_name=county_name,ccvi_score=ccvi_score, covid_cases_per_100k =covid_cases_per_100k, covid_deaths_per_100k = covid_deaths_per_100k)
# We create the circles in the map.
folium.Circle(
radius=df.iloc[j]['covid_deaths_per_100k'] * 200,
weight=1, # thickness of the border
color='red', # this is the color of the border
opacity=0.3, # this is the alpha for the border
fill_color='red', # fill is inside the circle
fill_opacity=0.1, # we will make that less opaque so we can see layers
location = [long_lat_lst[j][1],long_lat_lst[j][0]],
popup = folium.Popup(popup_text,max_width=300),
fill = True).add_to(map)
map
# chloropleth map prep
# check the data types
df_geo.dtypes
state object abbreviation object county object fips int64 t1_socioecon float64 t2_household float64 t3_minority float64 t4_housing float64 epidim float64 healthcare_sys float64 ccvi_score float64 lat float64 long float64 covid_total int64 covid_deaths int64 todays_date datetime64[ns] covid_confirmed object covid_suspected object covid_total_hospitalized object all_hospital_beds object icu_covid_confirmed_patients object icu_suspected_covid_patients object icu_available_beds object population int64 covid_cases_per_100k float64 covid_deaths_per_100k float64 dtype: object
We have objects that contain numeric data:
numerical_cols = ['covid_confirmed', 'covid_suspected',
'covid_total_hospitalized', 'all_hospital_beds', 'icu_covid_confirmed_patients',
'icu_suspected_covid_patients', 'icu_available_beds']
# convert these to floats using the pandas.Series.astype function
for column in numerical_cols:
df_geo[column] = df_geo[column].astype(float)
# check what directory we're in
!ls
sample_data
!cd sample_data
Folium is a python library that allows you to create multiple types of Leaflet maps. To create a choropleth map, Folium requires a Geo JSON file, including geospatial data of the region. For a choropleth map of CA counties, we need a Geo JSON file that defines the boundaries of the counties. You can download the CA counties file from this link: https://github.com/codeforamerica/click_that_hood/blob/master/public/data/california-counties.geojson
To bind the data frame & json file, the name of the county in our data frame must exactly match the name in the json file. Let's check the names in the json file.
import json
# path to geojson map data for CA counties
CA_COUNTIES_GEO_FILEPATH = 'sample_data/ca-counties.geojson.txt' # 'https://storage.googleapis.com/map-trace/ca-counties.geojson.txt'
# open json file. json.load() methods returns a python dictionary
with open('sample_data/ca-counties.geojson.txt') as counties_file:
# with open(CA_COUNTIES_GEO_FILEPATH) as counties_file:
counties_json = json.load(counties_file)
# loop through dictionary to obtain name of counties in the json file
names_json = []
for index in range(len(counties_json['features'])):
names_json.append(counties_json['features'][index]['properties']['name'])
names_json
sorted(names_json)
# remove quotes
for name in names_json:
name = name.replace("'","")
df_geo.county
186 Alameda 187 Alpine 188 Amador 189 Butte 190 Calaveras 191 Colusa 192 Contra Costa 193 Del Norte 194 El Dorado 195 Fresno 196 Glenn 197 Humboldt 198 Imperial 199 Inyo 200 Kern 201 Kings 202 Lake 203 Lassen 204 Los Angeles 205 Madera 206 Marin 207 Mariposa 208 Mendocino 209 Merced 210 Modoc 211 Mono 212 Monterey 213 Napa 214 Nevada 215 Orange 216 Placer 217 Plumas 218 Riverside 219 Sacramento 220 San Benito 221 San Bernardino 222 San Diego 223 San Francisco 224 San Joaquin 225 San Luis Obispo 226 San Mateo 227 Santa Barbara 228 Santa Clara 229 Santa Cruz 230 Shasta 231 Sierra 232 Siskiyou 233 Solano 234 Sonoma 235 Stanislaus 236 Sutter 237 Tehama 238 Trinity 239 Tulare 240 Tuolumne 241 Ventura 242 Yolo 243 Yuba Name: county, dtype: object
# make a list of names in the data frame
df_names = df_geo.county.tolist()
# names in the json file - the same order as in the data frame
geojson_names = ['Alameda',
'Alpine',
'Amador',
'Butte',
'Calaveras',
'Colusa',
'Contra Costa',
'Del Norte',
'El Dorado',
'Fresno',
'Glenn',
'Humboldt',
'Imperial',
'Inyo',
'Kern',
'Kings',
'Lake',
'Lassen',
'Los Angeles',
'Madera',
'Marin',
'Mariposa',
'Mendocino',
'Merced',
'Modoc',
'Mono',
'Monterey',
'Napa',
'Nevada',
'Orange',
'Placer',
'Plumas',
'Riverside',
'Sacramento',
'San Benito',
'San Bernardino',
'San Diego',
'San Francisco',
'San Joaquin',
'San Luis Obispo',
'San Mateo',
'Santa Barbara',
'Santa Clara',
'Santa Cruz',
'Shasta',
'Sierra',
'Siskiyou',
'Solano',
'Sonoma',
'Stanislaus',
'Sutter',
'Tehama',
'Trinity',
'Tulare',
'Tuolumne',
'Ventura',
'Yolo',
'Yuba']
# replace data frame names by json names
df_geo.replace(dict(zip(df_names, geojson_names)), inplace=True)
df_geo.county
186 Alameda 187 Alpine 188 Amador 189 Butte 190 Calaveras 191 Colusa 192 Contra Costa 193 Del Norte 194 El Dorado 195 Fresno 196 Glenn 197 Humboldt 198 Imperial 199 Inyo 200 Kern 201 Kings 202 Lake 203 Lassen 204 Los Angeles 205 Madera 206 Marin 207 Mariposa 208 Mendocino 209 Merced 210 Modoc 211 Mono 212 Monterey 213 Napa 214 Nevada 215 Orange 216 Placer 217 Plumas 218 Riverside 219 Sacramento 220 San Benito 221 San Bernardino 222 San Diego 223 San Francisco 224 San Joaquin 225 San Luis Obispo 226 San Mateo 227 Santa Barbara 228 Santa Clara 229 Santa Cruz 230 Shasta 231 Sierra 232 Siskiyou 233 Solano 234 Sonoma 235 Stanislaus 236 Sutter 237 Tehama 238 Trinity 239 Tulare 240 Tuolumne 241 Ventura 242 Yolo 243 Yuba Name: county, dtype: object
# draw the folium map for covid death rates in CA counties
counties_map = folium.Map(location=[36.17, -119.7462],zoom_start = 6, tiles = 'Stamen Terrain')#'Stamen Toner')
folium.Choropleth(
geo_data = CA_COUNTIES_GEO_FILEPATH,
data = df_geo,
columns = ['county', 'covid_deaths_per_100k'],
key_on = 'feature.properties.name',
fill_color = 'RdPu',
fill_opacity = 1,
line_opacity = 1,
legend_name='CA County Covid Deaths per 100k Population',
smooth_factor = 0).add_to(counties_map)
for j in range(len(long_lat_lst)):
# We grab the infos that we want to add to the popup.
county_name = df_geo.iloc[j, df_geo.columns.get_loc("county")]
ccvi_score = df_geo.iloc[j, df_geo.columns.get_loc("ccvi_score")]
covid_cases_per_100k = df_geo.iloc[j, df_geo.columns.get_loc("covid_cases_per_100k")]
covid_deaths_per_100k = df_geo.iloc[j, df_geo.columns.get_loc("covid_deaths_per_100k")]
covid_deaths_per100k = df_geo.iloc[j, df_geo.columns.get_loc("covid_deaths")]
popup_text = "County: {county_name} <br>""CCVI Score: {ccvi_score} <br>""Covid Total Per 100K: {covid_cases_per_100k} <br>""Covid Deaths Per 100K: {covid_deaths_per_100k} <br>".format(county_name=county_name,ccvi_score=ccvi_score, covid_cases_per_100k =covid_cases_per_100k, covid_deaths_per_100k = covid_deaths_per_100k)
"""
#We create the circles in the map.
folium.Circle(
radius=100,
weight=2,
color='black',
location = [long_lat_lst[j][1],long_lat_lst[j][0]],
popup = folium.Popup(popup_text,max_width=300),
fill = True).add_to(counties_map)
"""
#display map
counties_map
# draw the folium map for CCVI vulnerability rates in CA counties
counties_map = folium.Map(location=[36.17, -119.7462],zoom_start = 6, tiles ='Stamen Terrain') # 'Stamen Toner')
folium.Choropleth(
geo_data = CA_COUNTIES_GEO_FILEPATH,
data = df_geo,
columns = ['county', 'ccvi_score'],
key_on = 'feature.properties.name',
fill_color = 'RdPu',
fill_opacity = 1,
line_opacity = 1,
legend_name='CA County Covid Vulnerability (CCVI score)',
smooth_factor = 0).add_to(counties_map)
for j in range(len(long_lat_lst)):
# Grab info to add to the popup.
county_name = df_geo.iloc[j, df_geo.columns.get_loc("county")]
ccvi_score = df_geo.iloc[j, df_geo.columns.get_loc("ccvi_score")]
covid_cases_per_100k = df_geo.iloc[j, df_geo.columns.get_loc("covid_cases_per_100k")]
covid_deaths_per_100k = df_geo.iloc[j, df_geo.columns.get_loc("covid_deaths_per_100k")]
covid_deaths_per100k = df_geo.iloc[j, df_geo.columns.get_loc("covid_deaths")]
popup_text = "County: {county_name} <br>""CCVI Score: {ccvi_score} <br>""Covid Total Per 100K: {covid_cases_per_100k} <br>""Covid Deaths Per 100K: {covid_deaths_per_100k} <br>".format(county_name=county_name,ccvi_score=ccvi_score, covid_cases_per_100k =covid_cases_per_100k, covid_deaths_per_100k = covid_deaths_per_100k)
"""
# Create circles in the map.
folium.Circle(
radius=100,
weight=2,
color='black',
location = [long_lat_lst[j][1],long_lat_lst[j][0]],
popup = folium.Popup(popup_text,max_width=300),
fill = True).add_to(counties_map)
"""
#display map
counties_map
# draw the folium map for Covid Case rates in CA counties
counties_map = folium.Map(location=[36.17, -119.7462],zoom_start = 6, tiles ='Stamen Terrain') # 'Stamen Toner')
folium.Choropleth(
geo_data = CA_COUNTIES_GEO_FILEPATH,
data = df_geo,
columns = ['county', 'covid_cases_per_100k'],
key_on = 'feature.properties.name',
fill_color = 'RdPu',
fill_opacity = 1,
line_opacity = 1,
legend_name='CA County Covid Cases per 100k',
smooth_factor = 0).add_to(counties_map)
for j in range(len(long_lat_lst)):
# Grab info to add to the popup.
county_name = df_geo.iloc[j, df_geo.columns.get_loc("county")]
ccvi_score = df_geo.iloc[j, df_geo.columns.get_loc("ccvi_score")]
covid_cases_per_100k = df_geo.iloc[j, df_geo.columns.get_loc("covid_cases_per_100k")]
covid_deaths_per_100k = df_geo.iloc[j, df_geo.columns.get_loc("covid_deaths_per_100k")]
covid_deaths_per100k = df_geo.iloc[j, df_geo.columns.get_loc("covid_deaths")]
popup_text = "County: {county_name} <br>""CCVI Score: {ccvi_score} <br>""Covid Total Per 100K: {covid_cases_per_100k} <br>""Covid Deaths Per 100K: {covid_deaths_per_100k} <br>".format(county_name=county_name,ccvi_score=ccvi_score, covid_cases_per_100k =covid_cases_per_100k, covid_deaths_per_100k = covid_deaths_per_100k)
# Create circles in the map.
folium.Circle(
radius=100,
weight=2,
color='black',
location = [long_lat_lst[j][1],long_lat_lst[j][0]],
popup = folium.Popup(popup_text,max_width=300),
fill = True).add_to(counties_map)
#display map
counties_map
import seaborn as sns # for visuals
import matplotlib.pyplot as plt # for graphs
# how graphs are printed
%matplotlib inline
plt.style.use('ggplot')
# Plot Covid Vulnerability by County
# order plot by ccvi score instead of default alphabetical county name
plot_order = ca_df_raw.sort_values(by='ccvi_score', ascending=False).county.values
# plot
ax = sns.barplot(x='ccvi_score', y='county', data=ca_df_raw, order=plot_order)#, palette='RdYlGn')
fig = plt.gcf()
fig.set_size_inches(5, 10)
plt.xlabel("Covid Vulnerability (CCVI Score)")
plt.ylabel("County")
plt.title("Covid Vulnerability by County in CA")
# plt.savefig("ccvi_by_county.png", format='png',dpi=150)
Text(0.5, 1.0, 'Covid Vulnerability by County in CA')
# Plot Covid Total Deaths by County
plot_order = ca_df_raw.sort_values(by='covid_total', ascending=False).county.values
ax = sns.barplot(x='covid_total', y='county', data=ca_df_raw, order=plot_order, palette='RdYlGn')
fig = plt.gcf()
fig.set_size_inches(5, 10)
plt.xlabel("Covid Total Deaths")
plt.ylabel("County")
plt.title("Covid Total Deaths by County in CA")
Text(0.5, 1.0, 'Covid Total Deaths by County in CA')
# Plot Covid Total Deaths by County
plot_order = ca_df_raw.sort_values(by='covid_deaths_per_100k', ascending=False).county.values
ax = sns.barplot(x='covid_deaths_per_100k', y='county', data=ca_df_raw, order=plot_order, palette='RdYlGn')
fig = plt.gcf()
fig.set_size_inches(5, 10)
plt.xlabel("Covid Deaths by Population (100k)")
plt.ylabel("County")
plt.title("Covid Deaths in CA Normalized by Population")
Text(0.5, 1.0, 'Covid Deaths in CA Normalized by Population')
%%time
# Install Libraries
# # Important library for many geopython libraries
# !apt install gdal-bin python-gdal python3-gdal
# # Install rtree - Geopandas requirment
# !apt install python3-rtree
# # Install Geopandas
# !pip install git+git://github.com/geopandas/geopandas.git
# # Install descartes - Geopandas requirment
# !pip install descartes
import geopandas as gpd
from branca.element import Template, MacroElement
CPU times: user 38 µs, sys: 2 µs, total: 40 µs Wall time: 46 µs
geoJSON_df = gpd.read_file(CA_COUNTIES_GEO_FILEPATH)
geoJSON_df.head()
name | cartodb_id | created_at | updated_at | geometry | |
---|---|---|---|---|---|
0 | Alameda | 1 | 2015-07-04T21:04:58+00:00 | 2015-07-04T21:04:58+00:00 | MULTIPOLYGON (((-122.31293 37.89733, -122.2884... |
1 | Alpine | 2 | 2015-07-04T21:04:58+00:00 | 2015-07-04T21:04:58+00:00 | POLYGON ((-120.07239 38.70277, -119.96495 38.7... |
2 | Amador | 3 | 2015-07-04T21:04:58+00:00 | 2015-07-04T21:04:58+00:00 | POLYGON ((-121.02726 38.48925, -121.02741 38.5... |
3 | Butte | 4 | 2015-07-04T21:04:58+00:00 | 2015-07-04T21:04:58+00:00 | POLYGON ((-121.87925 39.30361, -121.90831 39.3... |
4 | Calaveras | 5 | 2015-07-04T21:04:58+00:00 | 2015-07-04T21:04:58+00:00 | POLYGON ((-120.87605 38.02889, -120.91875 38.0... |
# Make a copy of the data df_geo dataframe
df_geo_copy = df_geo
# Rename the county column to name so we can merge the dataframes using geopandas and merge on name column
df_geo_copy = df_geo_copy.rename({'county': 'name'}, axis=1)
final_df_geo = geoJSON_df.merge(df_geo_copy, on = "name")
# Drop columns cartodb_id, created_at and updated_at
final_df_geo.drop(["cartodb_id","created_at","updated_at",'state', 'abbreviation', 'fips', 't1_socioecon','t2_household', 't3_minority', 't4_housing', 'epidim', 'healthcare_sys','lat', 'long', 'todays_date', 'covid_confirmed', 'covid_suspected','all_hospital_beds','icu_suspected_covid_patients','population'], axis=1, inplace=True)
#['state', 'abbreviation', 'fips', 't1_socioecon','t2_household', 't3_minority', 't4_housing', 'epidim', 'healthcare_sys','lat', 'long', 'todays_date', 'covid_confirmed', 'covid_suspected','all_hospital_beds','icu_suspected_covid_patients','population']
# round the covid_cases_per_100k and covid_deaths_per_100k
final_df_geo.covid_cases_per_100k = final_df_geo.covid_cases_per_100k.round()
final_df_geo.covid_deaths_per_100k = final_df_geo.covid_deaths_per_100k.round()
# round ccvi_socre to three decimals
final_df_geo.ccvi_score = final_df_geo.ccvi_score.round(3)
final_df_geo.head()
name | geometry | ccvi_score | covid_total | covid_deaths | covid_total_hospitalized | icu_covid_confirmed_patients | icu_available_beds | covid_cases_per_100k | covid_deaths_per_100k | |
---|---|---|---|---|---|---|---|---|---|---|
0 | Alameda | MULTIPOLYGON (((-122.31293 37.89733, -122.2884... | 0.097 | 79471 | 1175 | 202.0 | 58.0 | 78.0 | 4729.0 | 70.0 |
1 | Alpine | POLYGON ((-120.07239 38.70277, -119.96495 38.7... | 0.515 | 80 | 0 | NaN | NaN | NaN | 6617.0 | 0.0 |
2 | Amador | POLYGON ((-121.02726 38.48925, -121.02741 38.5... | 0.156 | 3427 | 39 | 8.0 | 1.0 | 0.0 | 8473.0 | 96.0 |
3 | Butte | POLYGON ((-121.87925 39.30361, -121.90831 39.3... | 0.235 | 10823 | 156 | 11.0 | 2.0 | 11.0 | 5497.0 | 79.0 |
4 | Calaveras | POLYGON ((-120.87605 38.02889, -120.91875 38.0... | 0.098 | 1898 | 25 | 2.0 | 1.0 | 7.0 | 4098.0 | 54.0 |
# Here I add some javascript for the legend
# This template below allows us to have a legend on our folium map.
template = """
{% macro html(this, kwargs) %}
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>jQuery UI Draggable - Default functionality</title>
<link rel="stylesheet" href="//code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css">
<script src="https://code.jquery.com/jquery-1.12.4.js"></script>
<script src="https://code.jquery.com/ui/1.12.1/jquery-ui.js"></script>
<script>
$( function() {
$( "#maplegend" ).draggable({
start: function (event, ui) {
$(this).css({
right: "auto",
top: "auto",
bottom: "auto"
});
}
});
});
</script>
</head>
<body>
<div id='maplegend' class='maplegend'
style='position: absolute; z-index:9999; border:2px solid grey; background-color:rgba(255, 255, 255, 0.8);
border-radius:6px; padding: 10px; font-size:14px; right: 20px; bottom: 20px;'>
<div class='legend-title'>Legend (draggable!)</div>
<div class='legend-scale'>
<ul class='legend-labels'>
<li><span style='background:#a9a9a9;opacity:0.7;'></span>Missing Data - NaN value</li>
</ul>
</div>
</div>
</body>
</html>
<style type='text/css'>
.maplegend .legend-title {
text-align: left;
margin-bottom: 5px;
font-weight: bold;
font-size: 90%;
}
.maplegend .legend-scale ul {
margin: 0;
margin-bottom: 5px;
padding: 0;
float: left;
list-style: none;
}
.maplegend .legend-scale ul li {
font-size: 80%;
list-style: none;
margin-left: 0;
line-height: 18px;
margin-bottom: 2px;
}
.maplegend ul.legend-labels li span {
display: block;
float: left;
height: 16px;
width: 30px;
margin-right: 5px;
margin-left: 0;
border: 1px solid #999;
}
.maplegend .legend-source {
font-size: 80%;
color: #777;
clear: both;
}
.maplegend a {
color: #777;
}
</style>
{% endmacro %}"""
# One Folium Plot
lst = ['ccvi_score', 'covid_total', 'covid_deaths','covid_total_hospitalized', 'icu_covid_confirmed_patients', 'icu_available_beds', 'covid_cases_per_100k','covid_deaths_per_100k']
m = folium.Map(location=[36.17, -119.7462],zoom_start = 6, tiles =None)
for i in lst:
folium.Choropleth(
geo_data=final_df_geo,
data=final_df_geo,
columns=['name',i],
key_on="feature.properties.name",
fill_color='RdPu',
fill_opacity=1,
line_opacity=0.2,
legend_name=i,
smooth_factor=0,
Highlight= True,
name = i,
show=False,
overlay=True,
nan_fill_color= "#a9a9a9"
).add_to(m)
# We add the hover functionality to display data.
style_function = lambda x: {'fillColor': '#ffffff',
'color':'#000000',
'fillOpacity': 0.1,
'weight': 0.1}
highlight_function = lambda x: {'fillColor': '#000000',
'color':'#000000',
'fillOpacity': 0.50,
'weight': 0.1}
NIL = folium.features.GeoJson(
final_df_geo,
style_function=style_function,
control=True,
overlay = False,
highlight_function=highlight_function,
name = "California Map",
tooltip=folium.features.GeoJsonTooltip(
fields=['name','ccvi_score', 'covid_total', 'covid_deaths','covid_total_hospitalized', 'icu_covid_confirmed_patients', 'icu_available_beds', 'covid_cases_per_100k','covid_deaths_per_100k'],
aliases=['name','ccvi_score', 'covid_total', 'covid_deaths','covid_total_hospitalized', 'icu_covid_confirmed_patients', 'icu_available_beds', 'covid_cases_per_100k','covid_deaths_per_100k'],
style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;")
)
)
m.add_child(NIL)
m.keep_in_front(NIL)
folium.TileLayer('Stamen Terrain',name="light mode",control=True).add_to(m)
folium.TileLayer('Stamen Toner',name="dark mode",control=True).add_to(m)
folium.LayerControl(collapsed=False).add_to(m)
macro = MacroElement()
macro._template = Template(template)
m.get_root().add_child(macro)
m