Link census tract populations, total and by race, into police beats. Attributes population from tracts to beats by the areas of the overlaps. The basic procedure is to find the overlaps between beats and Census tracts, then addign a portion of the population of the tract to the beat, based on the raio of the size of overlap to the size of the tract.
import seaborn as sns
import metapack as mp
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display
import utm
%matplotlib inline
sns.set_context('notebook')
#pkg = mp.jupyter.open_package()
pkg = mp.jupyter.open_source_package()
pkg
sandiego.gov-police_regions-2.1.2
Last Update: 2021-02-25T19:07:59
Boundary shapes for San Diego neighborhoods, beats and divisions, with ACS 2019 estimates for populations, by race.
This package links shapefiles for San Diego police beats to Census tracts and merges in ACS estimates for population, by race, from the 2016 5 year ACS. When a police beat boundry crosses a tract, the tract population is allocated to beats by the proportion of the overlap by area. See the Jupyter notebook that performs the procedure for details.
For the race/ethicty statistics, Hispanic ('hisp') refers to Hispanics of any race, while all other races refer to non-Hispanics of that race.
# Convert to EPSG:26911, ( A randomly selected UTM Zone 11N CRS) so area calculations
# will be in square meters, rather than square degrees
beats = pkg.resource('pd_beats').geoframe()
beats.head()
objectid | beat | div | serv | name | geometry | |
---|---|---|---|---|---|---|
0 | 3 | 935 | 9 | 930 | NORTH CITY | MULTIPOLYGON (((-117.20425 32.96202, -117.2043... |
1 | 7 | 0 | 0 | 0 | SAN DIEGO | MULTIPOLYGON (((-117.22526 32.70267, -117.2264... |
2 | 8 | 511 | 5 | 510 | MULTIPOLYGON (((-117.22529 32.70261, -117.2246... | |
3 | 9 | 722 | 7 | 720 | NESTOR | POLYGON ((-117.09042 32.58383, -117.08714 32.5... |
4 | 10 | 314 | 3 | 310 | BIRDLAND | POLYGON ((-117.15149 32.80650, -117.14943 32.7... |
# Figure out the UTM zone, and from that the CRS EPSG for the UTM zone
xmin, ymin, xmax, ymax = beats.to_crs(4326).envelope.total_bounds
band = utm.from_latlon((ymax-ymin)/2+ymin, (xmax-xmin)/2+xmin )[2]
epsg = int('326'+str(band))
assert(epsg == 32611) # It's always San Diego, so we already know what it is ...
# Convert to a UTM EPSG, so that we can do calculations in meters.
beats = beats.to_crs(epsg)
# There are beats that are way off in east county. Get rid of them.
#rightmost_centroid = beats.centroid.x.sort_values(ascending=False).iloc[:6].max()
#beats = beats[beats.centroid.x <rightmost_centroid]
# It looks like the dataset has multiple rows per beat, one feature per row. We need
# it to have one row per beat, with multiple features combined together.
beats = beats.dissolve(by='beat').reset_index()
# Add the area
beats['beat_area'] = beats.area / 1_000_000
beats.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7fd8f861d650>
tracts = pkg.reference('tracts').geoframe().to_crs(epsg)
# Add the area
tracts['tract_area'] = tracts.area / 1_000_000
tracts.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7fd8f8803910>
# White, black, asian, etc are all non hispanic.
col_map = {
'b03002_001':'total',
'b03002_003':'white',
'b03002_004':'black',
'b03002_005':'aian',
'b03002_006':'asian',
'b03002_007':'nhopi',
'b03002_012':'hisp'
}
for k,v in list(col_map.items()):
col_map[k+'_m90'] = col_map[k]+'_m90'
t = pkg.reference('race').dataframe()
race_tracts = t[t.county=='073'].rename(columns=col_map).reset_index().rename(columns={'GEOID':'geoid'})
race_tracts = race_tracts[['geoid', 'total', 'white', 'black', 'aian', 'asian', 'nhopi', 'hisp']]
race_tracts.titles.head().T
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
GEOID | 14000US06073000100 | 14000US06073000201 | 14000US06073000202 | 14000US06073000300 | 14000US06073000400 |
total | 3093 | 1891 | 4542 | 5239 | 3801 |
white | 2389 | 1569 | 3390 | 3820 | 2148 |
black | 0 | 10 | 4 | 266 | 228 |
aian | 0 | 11 | 0 | 0 | 0 |
asian | 112 | 75 | 379 | 146 | 430 |
nhopi | 0 | 0 | 3 | 7 | 0 |
hisp | 489 | 140 | 616 | 871 | 884 |
t = gpd.sjoin(beats, tracts)
ax = t.plot()
beats.centroid.plot(ax=ax, color='red')
t = t[['geoid', 'beat']].drop_duplicates()\
.merge(tracts[['geoid','geometry', 'tract_area']],on='geoid')\
.merge(beats[['beat','geometry', 'beat_area']],on='beat')
# merge by finding intersections. Each record in intr will be an area that is entirely
# in both one tract and one beat.
intr = gpd.overlay(beats, tracts, how='intersection')[['beat','geoid','geometry']]
print(len(beats), len(tracts), len(intr))
intr['intr_area'] = (intr.geometry.area/1_000_000.0).astype(float)
# Get rid of really small intersections
intr = intr[intr.intr_area >= .01]
# Merge back with beats and tracts to get beat and tract areas.
merged = intr[['beat','geoid', 'intr_area']]\
.merge(tracts[['geoid', 'tract_area']],on='geoid')\
.merge(beats[['beat', 'beat_area']],on='beat')\
.merge(race_tracts, on='geoid')
merged = merged.drop_duplicates(subset=['beat','geoid'])
merged['tract_overlap_proportion'] = merged.intr_area/merged.tract_area
merged['beat_overlap_proportion'] = merged.intr_area/merged.beat_area
# The intersection areas must be smaller than both of the areas being intersected
assert(not any(merged.intr_area > merged.beat_area))
assert(not any(merged.intr_area > merged.tract_area))
# Check that all of the areas of the beats are accounted for
assert(all(merged.groupby('beat').beat_overlap_proportion.sum().round(1) == 1))
# Multiply by the overlap portion to get the final values.
merged.loc[:,'total':'hisp'] = merged.loc[:,'total':'hisp'].multiply(merged.tract_overlap_proportion, axis=0)
merged.head().T
127 8057 1011
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
beat | 0 | 511 | 0 | 511 | 0 |
geoid | 14000US06073003800 | 14000US06073003800 | 14000US06073011300 | 14000US06073011300 | 14000US06073009902 |
intr_area | 0.0367429 | 1.77045 | 0.0511633 | 1.91826 | 17.1586 |
tract_area | 1.81432 | 1.81432 | 10.7308 | 10.7308 | 20.1302 |
beat_area | 18.2217 | 6.79885 | 18.2217 | 6.79885 | 18.2217 |
total | 129.043 | 6217.91 | 11.7624 | 441.007 | 0 |
white | 59.6613 | 2874.76 | 5.74055 | 215.23 | 0 |
black | 25.3348 | 1220.75 | 1.99775 | 74.9015 | 0 |
aian | 1.2151 | 58.5491 | 0.0572148 | 2.14515 | 0 |
asian | 11.2599 | 542.555 | 0.505397 | 18.9488 | 0 |
nhopi | 0.931574 | 44.8876 | 0.0333753 | 1.25134 | 0 |
hisp | 27.3599 | 1318.33 | 3.28508 | 123.167 | 0 |
tract_overlap_proportion | 0.0202516 | 0.975818 | 0.0047679 | 0.178763 | 0.85238 |
beat_overlap_proportion | 0.00201643 | 0.260404 | 0.00280781 | 0.282145 | 0.941655 |
beat_demographics = merged.groupby('beat').sum()[['total', 'white', 'black', 'aian', 'asian', 'nhopi', 'hisp']].round()
# Sanity checks. SD population is about 1.3M
assert beat_demographics.sum().total > 1_300_000
rel_err = np.abs(beat_demographics.loc[:,'white':].sum().sum() - beat_demographics.total.sum()) / beat_demographics.total.sum()
assert rel_err < 0.05
merged.loc[:,'total':'hisp'].multiply(merged.tract_overlap_proportion, axis=0)
total | white | black | aian | asian | nhopi | hisp | |
---|---|---|---|---|---|---|---|
0 | 2.613335 | 1.208237 | 5.130699e-01 | 2.460767e-02 | 2.280311e-01 | 1.886588e-02 | 0.554083 |
1 | 6067.555950 | 2805.244794 | 1.191229e+03 | 5.713330e+01 | 5.294352e+02 | 4.380219e+01 | 1286.451364 |
2 | 0.056082 | 0.027370 | 9.525060e-03 | 2.727941e-04 | 2.409681e-03 | 1.591299e-04 | 0.015663 |
3 | 78.835542 | 38.475068 | 1.338958e+01 | 3.834724e-01 | 3.387340e+00 | 2.236923e-01 | 22.017709 |
4 | 0.000000 | 0.000000 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000 |
... | ... | ... | ... | ... | ... | ... | ... |
551 | 5564.298683 | 358.122985 | 9.225883e+01 | 0.000000e+00 | 1.332297e+03 | 0.000000e+00 | 3561.389244 |
552 | 0.000113 | 0.000047 | 1.249278e-05 | 6.104937e-07 | 6.060266e-06 | 2.799337e-06 | 0.000043 |
553 | 0.000020 | 0.000010 | 3.396700e-07 | 9.520676e-07 | 9.917371e-09 | 1.239671e-08 | 0.000009 |
554 | 3.493589 | 0.466302 | 1.833630e-01 | 3.595353e-03 | 7.612770e-01 | 9.066543e-03 | 1.683407 |
555 | 7150.583811 | 5565.303536 | 1.306223e+02 | 0.000000e+00 | 7.748280e+02 | 1.880170e+01 | 340.409747 |
556 rows × 7 columns
beat_demographics
total | white | black | aian | asian | nhopi | hisp | |
---|---|---|---|---|---|---|---|
beat | |||||||
0 | 542.0 | 264.0 | 62.0 | 2.0 | 34.0 | 3.0 | 161.0 |
111 | 27069.0 | 13222.0 | 1530.0 | 0.0 | 5210.0 | 191.0 | 6035.0 |
112 | 12344.0 | 8237.0 | 282.0 | 2.0 | 1245.0 | 22.0 | 2098.0 |
113 | 12721.0 | 8210.0 | 1.0 | 0.0 | 784.0 | 0.0 | 3108.0 |
114 | 16373.0 | 8843.0 | 339.0 | 3.0 | 2183.0 | 0.0 | 4318.0 |
... | ... | ... | ... | ... | ... | ... | ... |
933 | 7391.0 | 5722.0 | 136.0 | 0.0 | 821.0 | 19.0 | 360.0 |
934 | 45788.0 | 25358.0 | 428.0 | 177.0 | 13754.0 | 12.0 | 4013.0 |
935 | 9513.0 | 5535.0 | 25.0 | 6.0 | 2731.0 | 16.0 | 706.0 |
936 | 7762.0 | 3479.0 | 102.0 | 63.0 | 2777.0 | 30.0 | 808.0 |
937 | 9451.0 | 5558.0 | 113.0 | 49.0 | 2568.0 | 0.0 | 763.0 |
127 rows × 7 columns
merged
beat | geoid | intr_area | tract_area | beat_area | total | white | black | aian | asian | nhopi | hisp | tract_overlap_proportion | beat_overlap_proportion | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 14000US06073003800 | 0.036743 | 1.814322 | 18.221746 | 129.043281 | 59.661253 | 25.334768 | 1.215097 | 11.259897 | 0.931574 | 27.359930 | 0.020252 | 0.002016 |
1 | 511 | 14000US06073003800 | 1.770449 | 1.814322 | 6.798852 | 6217.914965 | 2874.761062 | 1220.748842 | 58.549105 | 542.555041 | 44.887647 | 1318.330684 | 0.975818 | 0.260404 |
2 | 0 | 14000US06073011300 | 0.051163 | 10.730782 | 18.221746 | 11.762402 | 5.740548 | 1.997749 | 0.057215 | 0.505397 | 0.033375 | 3.285081 | 0.004768 | 0.002808 |
3 | 511 | 14000US06073011300 | 1.918262 | 10.730782 | 6.798852 | 441.007122 | 215.230067 | 74.901493 | 2.145150 | 18.948827 | 1.251338 | 123.167372 | 0.178763 | 0.282145 |
4 | 0 | 14000US06073009902 | 17.158599 | 20.130225 | 18.221746 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.852380 | 0.941655 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
551 | 833 | 14000US06073002601 | 0.831085 | 0.834416 | 0.833758 | 5586.604632 | 359.558615 | 92.628674 | 0.000000 | 1337.637729 | 0.000000 | 3575.666006 | 0.996007 | 0.996793 |
552 | 9 | 14000US06073021302 | 0.046602 | 381.905937 | 0.333563 | 0.929342 | 0.388039 | 0.102379 | 0.005003 | 0.049664 | 0.022941 | 0.356069 | 0.000122 | 0.139710 |
553 | 9 | 14000US06073021100 | 0.058278 | 1170.397788 | 0.333563 | 0.398244 | 0.192798 | 0.006822 | 0.019121 | 0.000199 | 0.000249 | 0.170989 | 0.000050 | 0.174713 |
554 | 9 | 14000US06073013314 | 0.219781 | 17.578573 | 0.333563 | 279.424806 | 37.295816 | 14.665770 | 0.287564 | 60.888577 | 0.725162 | 134.642523 | 0.012503 | 0.658890 |
555 | 933 | 14000US06073008324 | 5.483780 | 5.512622 | 5.609425 | 7188.193001 | 5594.574790 | 131.309366 | 0.000000 | 778.903283 | 18.900591 | 342.200165 | 0.994768 | 0.977601 |
556 rows × 14 columns