A data analysis for a story in the L.A. Times, by Matt Stiles & Luke Money.
Questions? matt.stiles@latimes.com
%load_ext lab_black
import pandas as pd
import geopandas as gpd
import glob
import path
import os
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
path = "/Users/mhustiles/data/github/coronavirus-tracker/_notebooks/data/raw/vaccines/los-angeles/"
all_files = glob.glob(os.path.join(path, "*.csv"))
df_from_each_file = (
pd.read_csv(f, encoding="ISO-8859-1", low_memory=False) for f in all_files
)
concatenated_df = pd.concat(df_from_each_file, ignore_index=True)
df = concatenated_df.copy()
df = df[
(df.vaccinations.str.strip() != "Unreliable Data")
& (df.pop_2019.str.strip() != "No Pop Data")
& (df.vaccinations_pct.str.strip() != "Unreliable Data")
& (df.vaccinations_pct.str.strip() != "No Pop Data")
& (df.vaccinations.str.strip() != "<5")
]
df["area"] = df["area"].str.strip(" ").str.replace("City of ", "", regex=False)
df["week"] = pd.to_datetime(df["update_date"]).dt.week
df.sort_values("update_date", ascending=False).head(10)
area | vaccinations | pop_2019 | vaccinations_pct | update_date | week | |
---|---|---|---|---|---|---|
4259 | West Covina | 38583 | 88873 | 43.4 | 2021-04-12 | 15 |
4406 | Unincorporated - Angeles National Forest | 296 | 1096 | 27.0 | 2021-04-12 | 15 |
4404 | Unincorporated - Altadena | 17205 | 36812 | 46.7 | 2021-04-12 | 15 |
4403 | Unincorporated - Agua Dulce | 958 | 3736 | 25.6 | 2021-04-12 | 15 |
4402 | Unincorporated - Acton | 1652 | 6961 | 23.7 | 2021-04-12 | 15 |
4401 | Los Angeles - Woodland Hills | 27387 | 57284 | 47.8 | 2021-04-12 | 15 |
4400 | Los Angeles - Winnetka | 15821 | 41471 | 38.1 | 2021-04-12 | 15 |
4399 | Los Angeles - Wilshire Center | 13370 | 40599 | 32.9 | 2021-04-12 | 15 |
4398 | Los Angeles - Wilmington | 11236 | 42449 | 26.5 | 2021-04-12 | 15 |
4397 | Los Angeles - Wholesale District | 12332 | 32528 | 37.9 | 2021-04-12 | 15 |
df["area"] = df["area"].str.replace("City of", "", regex=False)
df["vaccinations"] = df["vaccinations"].astype(float)
df["pop_2019"] = df["pop_2019"].astype(float)
df["vaccinations_pct"] = ((df["vaccinations"] / df["pop_2019"]) * 100).round(2)
df["week"] = pd.to_datetime(df["update_date"]).dt.week
df["weekday"] = pd.to_datetime(df["update_date"]).dt.day_name()
df = df[df["weekday"] == "Monday"]
df[df["area"].str.contains("Culver City")].sort_values(
"update_date", ascending=True
).head(100)
area | vaccinations | pop_2019 | vaccinations_pct | update_date | week | weekday | |
---|---|---|---|---|---|---|---|
14637 | Culver City | 7445.0 | 32840.0 | 22.67 | 2021-03-01 | 9 | Monday |
6981 | Culver City | 7445.0 | 32840.0 | 22.67 | 2021-03-08 | 10 | Monday |
15333 | Culver City | 9969.0 | 32840.0 | 30.36 | 2021-03-15 | 11 | Monday |
5937 | Culver City | 9969.0 | 32840.0 | 30.36 | 2021-03-22 | 12 | Monday |
14289 | Culver City | 13819.0 | 32840.0 | 42.08 | 2021-03-29 | 13 | Monday |
1761 | Culver City | 13819.0 | 32840.0 | 42.08 | 2021-04-05 | 14 | Monday |
4197 | Culver City | 17718.0 | 33638.0 | 52.67 | 2021-04-12 | 15 | Monday |
df_pivot = (
pd.pivot_table(
df,
values="vaccinations_pct",
index="area",
columns="week",
aggfunc="mean",
)
.round(2)
.reset_index()
)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-3-c8942e05d481> in <module> 1 df_pivot = ( ----> 2 pd.pivot_table( 3 df, 4 values="vaccinations_pct", 5 index="area", NameError: name 'pd' is not defined
df_pivot.head()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-4-a99db6e1a1af> in <module> ----> 1 df_pivot.head() NameError: name 'df_pivot' is not defined
df_pivot.columns = ["pct_week_" + str(col) for col in df_pivot.columns]
df_pivot.rename(
columns={"pct_week_area": "name", "week_pop_2019": "population"}, inplace=True
)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-1-7cc4466fa477> in <module> ----> 1 df_pivot.rename( 2 columns={"pct_week_area": "name", "week_pop_2019": "population"}, inplace=True 3 ) NameError: name 'df_pivot' is not defined
df_pivot.head()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-2-a99db6e1a1af> in <module> ----> 1 df_pivot.head() NameError: name 'df_pivot' is not defined
df_pivot["name"] = df_pivot["name"].str.strip(" ")
disparity = pd.read_csv("../../vaccine-disparity/_workspace/neighborhoods_grouped.csv")
disparity["name"] = disparity["name"].str.strip(" ")
disparity_clean = disparity[
[
"name",
"total_pop",
"totalpop_17andunder_percent",
"totalpop_65andup_percent",
"nonwhite_percentage",
"majority_white",
"majority_hispanic",
"median_householdincome",
]
]
df_merge = pd.merge(df_pivot, disparity_clean, on="name", indicator=True, how="left")
df_merge["change_week9_to_15"] = (
((df_merge["pct_week_15"] - df_merge["pct_week_9"]) / df_merge["pct_week_9"]) * 100
).round()
df_merge_slim = df_merge[
[
"name",
"pct_week_9",
"pct_week_15",
"total_pop",
"totalpop_17andunder_percent",
"totalpop_65andup_percent",
"nonwhite_percentage",
"majority_white",
"majority_hispanic",
"median_householdincome",
"change_week9_to_15",
]
].copy()
df_merge_slim["total_pop"] = df_merge_slim["total_pop"].fillna("").astype(int)
larger_places = df_merge_slim[df_merge_slim["total_pop"] > 5000].copy()
largest = (
larger_places[
(larger_places["pct_week_9"].notnull())
& (larger_places["pct_week_15"].notnull())
]
.sort_values("change_week9_to_15", ascending=False)
.head(10)
)
largest[
[
"name",
"pct_week_15",
"majority_hispanic",
"totalpop_17andunder_percent",
"totalpop_65andup_percent",
]
]
name | pct_week_15 | majority_hispanic | totalpop_17andunder_percent | totalpop_65andup_percent | |
---|---|---|---|---|---|
161 | Los Angeles - University Park | 25.15 | False | 0.119700 | 0.061099 |
273 | Unincorporated - Littlerock/Pearblossom | 21.74 | True | 0.284108 | 0.139551 |
155 | Los Angeles - Thai Town | 27.97 | False | 0.107368 | 0.130282 |
254 | Unincorporated - Florence-Firestone | 25.36 | True | 0.317636 | 0.068662 |
269 | Unincorporated - Lennox | 27.16 | True | 0.285876 | 0.064121 |
20 | Cudahy | 25.56 | True | 0.305616 | 0.069818 |
49 | Los Angeles - Alsace | 29.92 | True | 0.209612 | 0.089146 |
90 | Los Angeles - Green Meadows | 23.61 | True | 0.308907 | 0.074795 |
86 | Los Angeles - Florence-Firestone | 22.81 | True | 0.319442 | 0.055256 |
169 | Los Angeles - Vernon Central | 22.51 | True | 0.323692 | 0.062574 |
round(
larger_places[
(larger_places["pct_week_9"].notnull())
& (larger_places["pct_week_15"].notnull())
]
.sort_values("change_week9_to_15", ascending=False)
.head(10)["median_householdincome"]
.mean()
)
42400
round(
larger_places[
(larger_places["pct_week_9"].notnull())
& (larger_places["pct_week_15"].notnull())
]
.sort_values("change_week9_to_15", ascending=False)
.head(10)["totalpop_65andup_percent"]
.mean()
* 100,
2,
)
8.15
round(
larger_places[
(larger_places["pct_week_9"].notnull())
& (larger_places["pct_week_15"].notnull())
]
.sort_values("change_week9_to_15", ascending=False)
.tail(10)["median_householdincome"]
.mean()
)
141297
round(
larger_places[
(larger_places["pct_week_9"].notnull())
& (larger_places["pct_week_15"].notnull())
]
.sort_values("change_week9_to_15", ascending=False)
.tail(10)["totalpop_65andup_percent"]
.mean()
* 100,
2,
)
22.77
smallest = (
larger_places[
(larger_places["pct_week_9"].notnull())
& (larger_places["pct_week_15"].notnull())
]
.sort_values("change_week9_to_15", ascending=False)
.tail(10)
)
smallest[
[
"name",
"pct_week_15",
"majority_hispanic",
"totalpop_17andunder_percent",
"totalpop_65andup_percent",
]
]
name | pct_week_15 | majority_hispanic | totalpop_17andunder_percent | totalpop_65andup_percent | |
---|---|---|---|---|---|
200 | Rancho Palos Verdes | 50.23 | False | 0.213775 | 0.248590 |
180 | Los Angeles - Westwood | 35.76 | False | 0.088921 | 0.119817 |
203 | Rolling Hills Estates | 55.39 | False | 0.222468 | 0.247619 |
195 | Palos Verdes Estates | 51.26 | False | 0.213455 | 0.270924 |
81 | Los Angeles - Encino | 54.11 | False | 0.207176 | 0.199472 |
64 | Los Angeles - Century City | 59.68 | False | 0.177379 | 0.234012 |
67 | Los Angeles - Cheviot Hills | 63.66 | False | 0.211283 | 0.202283 |
55 | Los Angeles - Beverly Crest | 54.03 | False | 0.196094 | 0.237505 |
10 | Beverly Hills | 51.69 | False | 0.202520 | 0.213604 |
54 | Los Angeles - Bel Air | 52.68 | False | 0.209939 | 0.303493 |
larger_places[
(larger_places["pct_week_9"].notnull()) & (larger_places["pct_week_15"].notnull())
][
[
"name",
"pct_week_15",
"majority_hispanic",
"totalpop_17andunder_percent",
"totalpop_65andup_percent",
]
].sort_values(
"pct_week_15", ascending=False
).head(
10
)
name | pct_week_15 | majority_hispanic | totalpop_17andunder_percent | totalpop_65andup_percent | |
---|---|---|---|---|---|
67 | Los Angeles - Cheviot Hills | 63.66 | False | 0.211283 | 0.202283 |
136 | Los Angeles - Rancho Park | 62.71 | False | 0.207814 | 0.163806 |
64 | Los Angeles - Century City | 59.68 | False | 0.177379 | 0.234012 |
126 | Los Angeles - Pacific Palisades | 56.17 | False | 0.248622 | 0.236204 |
134 | Los Angeles - Playa Vista | 55.74 | False | 0.186668 | 0.104989 |
203 | Rolling Hills Estates | 55.39 | False | 0.222468 | 0.247619 |
81 | Los Angeles - Encino | 54.11 | False | 0.207176 | 0.199472 |
55 | Los Angeles - Beverly Crest | 54.03 | False | 0.196094 | 0.237505 |
212 | Sierra Madre | 53.69 | False | 0.182263 | 0.228925 |
208 | San Marino | 53.29 | False | 0.227649 | 0.206610 |
larger_places[
(larger_places["pct_week_9"].notnull()) & (larger_places["pct_week_15"].notnull())
][
[
"name",
"pct_week_15",
"majority_hispanic",
"totalpop_17andunder_percent",
"totalpop_65andup_percent",
]
].sort_values(
"pct_week_15", ascending=False
).tail(
10
)
name | pct_week_15 | majority_hispanic | totalpop_17andunder_percent | totalpop_65andup_percent | |
---|---|---|---|---|---|
179 | Los Angeles - Westlake | 22.88 | True | 0.230150 | 0.093303 |
86 | Los Angeles - Florence-Firestone | 22.81 | True | 0.319442 | 0.055256 |
169 | Los Angeles - Vernon Central | 22.51 | True | 0.323692 | 0.062574 |
146 | Los Angeles - South Park | 22.28 | True | 0.322496 | 0.056965 |
63 | Los Angeles - Central | 22.14 | True | 0.306173 | 0.065168 |
65 | Los Angeles - Century Palms/Cove | 21.88 | True | 0.311102 | 0.073659 |
273 | Unincorporated - Littlerock/Pearblossom | 21.74 | True | 0.284108 | 0.139551 |
172 | Los Angeles - Watts | 21.42 | True | 0.358358 | 0.055530 |
168 | Los Angeles - Vermont Vista | 20.70 | True | 0.319345 | 0.067000 |
266 | Unincorporated - Lake Los Angeles | 19.11 | True | 0.302845 | 0.114554 |
largest["category"] = "Largest increase"
smallest["category"] = "Smallest increase"
large_small = pd.concat([largest, smallest]).reset_index()
large_small["name_clean"] = (
large_small["name"]
.str.replace("City of", "", regex=False)
.str.replace("Los Angeles - ", "", regex=False)
.str.replace("Unincorporated - ", "", regex=False)
)
large_small_slim = large_small[
[
"category",
"name_clean",
"pct_week_9",
"pct_week_15",
"change_week9_to_15",
]
].copy()
large_small_slim.rename(
columns={
"change_week9_to_15": "% change",
"pct_week_15": "Week 15",
"pct_week_9": "Week 9",
"name_clean": "Place",
"category": "Category",
},
inplace=True,
)
places_geo = gpd.read_file(
"input/cities-neighborhoods-unincorporated-la-county-no-islands.geojson"
)
places_geo = places_geo.to_crs("EPSG:4326")
larger_places["name_clean"] = (
larger_places["name"]
.str.strip(" ")
.str.replace("City of", "", regex=False)
.str.replace("Los Angeles - ", "", regex=False)
.str.replace("Unincorporated - ", "", regex=False)
)
df_geo = places_geo.merge(larger_places, left_on="NAME", right_on="name_clean")
df_geo_slim = df_geo[
["name_clean", "pct_week_9", "pct_week_15", "change_week9_to_15", "geometry"]
]
df_geo_slim[
["name_clean", "pct_week_9", "pct_week_15", "change_week9_to_15", "geometry"]
].to_file("output/vaccine-community/df_geo.geojson", driver="GeoJSON")
df_geo_slim[["name_clean", "change_week9_to_15"]].to_csv(
"output/vaccine-community/df_geo_metadata_datawrapper.csv", index=False
)
!mapshaper -i output/vaccine-community/df_geo.geojson -simplify percentage=.3 no-repair -o output/vaccine-community/df_geo_simple.geojson \
[o] Wrote output/vaccine-community/df_geo_simple.geojson
import datetime as dt
today = dt.datetime.today().strftime("%m-%d-%Y")
concatenated_df.to_csv(
"output/vaccine-community/lac_vax_by_place_raw_table_" + today + ".csv",
index=False,
)
df_pivot.to_csv(
"output/vaccine-community/lac_vax_by_place_weeks_" + today + ".csv",
index=False,
)
larger_places[
(larger_places["pct_week_9"].notnull()) & (larger_places["pct_week_15"].notnull())
].sort_values("change_week9_to_15", ascending=False).head(10).to_csv(
"output/vaccine-community/largest_vax_coverage_change_" + today + ".csv",
index=False,
)
larger_places[
(larger_places["pct_week_9"].notnull()) & (larger_places["pct_week_15"].notnull())
].sort_values("change_week9_to_15", ascending=False).tail(10).to_csv(
"output/vaccine-community/smallest_vax_coverage_change_" + today + ".csv",
index=False,
)
larger_places[
(larger_places["pct_week_9"].notnull()) & (larger_places["pct_week_15"].notnull())
][["name", "pct_week_15", "total_pop"]].sort_values(
"pct_week_15", ascending=False
).tail(
10
).to_csv(
"output/vaccine-community/lowest_coverage_" + today + ".csv",
index=False,
)
larger_places[
(larger_places["pct_week_9"].notnull()) & (larger_places["pct_week_15"].notnull())
][["name", "pct_week_15", "total_pop"]].sort_values(
"pct_week_15", ascending=False
).head(
10
).to_csv(
"output/vaccine-community/highest_coverage_" + today + ".csv",
index=False,
)
large_small_slim.to_csv("output/vaccine-community/large_small_slim.csv", index=False)