#!/usr/bin/env python # coding: utf-8 # # Beat Populations # # Link census tract populations, total and by race, into police beats. Attributes population from tracts to beats by the areas of the overlaps. The basic procedure is to find the overlaps between beats and Census tracts, then addign a portion of the population of the tract to the beat, based on the raio of the size of overlap to the size of the tract. # In[1]: import seaborn as sns import metapack as mp import pandas as pd import geopandas as gpd import numpy as np import matplotlib.pyplot as plt from IPython.display import display import utm get_ipython().run_line_magic('matplotlib', 'inline') sns.set_context('notebook') # In[2]: #pkg = mp.jupyter.open_package() pkg = mp.jupyter.open_source_package() pkg # In[3]: # Convert to EPSG:26911, ( A randomly selected UTM Zone 11N CRS) so area calculations # will be in square meters, rather than square degrees beats = pkg.resource('pd_beats').geoframe() beats.head() # In[4]: # Figure out the UTM zone, and from that the CRS EPSG for the UTM zone xmin, ymin, xmax, ymax = beats.to_crs(4326).envelope.total_bounds band = utm.from_latlon((ymax-ymin)/2+ymin, (xmax-xmin)/2+xmin )[2] epsg = int('326'+str(band)) assert(epsg == 32611) # It's always San Diego, so we already know what it is ... # In[5]: # Convert to a UTM EPSG, so that we can do calculations in meters. beats = beats.to_crs(epsg) # There are beats that are way off in east county. Get rid of them. #rightmost_centroid = beats.centroid.x.sort_values(ascending=False).iloc[:6].max() #beats = beats[beats.centroid.x = .01] # Merge back with beats and tracts to get beat and tract areas. merged = intr[['beat','geoid', 'intr_area']]\ .merge(tracts[['geoid', 'tract_area']],on='geoid')\ .merge(beats[['beat', 'beat_area']],on='beat')\ .merge(race_tracts, on='geoid') merged = merged.drop_duplicates(subset=['beat','geoid']) merged['tract_overlap_proportion'] = merged.intr_area/merged.tract_area merged['beat_overlap_proportion'] = merged.intr_area/merged.beat_area # The intersection areas must be smaller than both of the areas being intersected assert(not any(merged.intr_area > merged.beat_area)) assert(not any(merged.intr_area > merged.tract_area)) # Check that all of the areas of the beats are accounted for assert(all(merged.groupby('beat').beat_overlap_proportion.sum().round(1) == 1)) # Multiply by the overlap portion to get the final values. merged.loc[:,'total':'hisp'] = merged.loc[:,'total':'hisp'].multiply(merged.tract_overlap_proportion, axis=0) merged.head().T # In[11]: beat_demographics = merged.groupby('beat').sum()[['total', 'white', 'black', 'aian', 'asian', 'nhopi', 'hisp']].round() # In[14]: # Sanity checks. SD population is about 1.3M assert beat_demographics.sum().total > 1_300_000 # In[24]: rel_err = np.abs(beat_demographics.loc[:,'white':].sum().sum() - beat_demographics.total.sum()) / beat_demographics.total.sum() assert rel_err < 0.05 # In[33]: merged.loc[:,'total':'hisp'].multiply(merged.tract_overlap_proportion, axis=0) # In[31]: beat_demographics # In[34]: merged # In[ ]: