Group our building points into grids that segment that state.
import os
import glob
import geopandas as gpd
%store -r
hexes_big = gpd.read_file(os.path.join(input_dir, "grids", "grid-x075.shp"))
hexes_small = gpd.read_file(os.path.join(input_dir, "grids", "grid-x05.shp"))
cali = gpd.read_file(os.path.join(input_dir, "state", "state.shp"))
cali['geometry'] = cali.buffer(0.1)
def cali_filter(df):
"""
Clips the provided geodataframe at the California border.
"""
return df[df.within(cali.ix[0].geometry)]
hexes_big_filtered = cali_filter(hexes_big)
/home/palewire/.virtualenvs/california-fire-zone-analysis/local/lib/python3.6/site-packages/ipykernel_launcher.py:5: DeprecationWarning: .ix is deprecated. Please use .loc for label based indexing or .iloc for positional indexing See the documentation here: http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated """
hexes_small_filtered = cali_filter(hexes_small)
/home/palewire/.virtualenvs/california-fire-zone-analysis/local/lib/python3.6/site-packages/ipykernel_launcher.py:5: DeprecationWarning: .ix is deprecated. Please use .loc for label based indexing or .iloc for positional indexing See the documentation here: http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated """
def set_id(df):
"""
Create a unique identifier for each grid.
"""
return df.reset_index().rename(columns={"index": "hex_id"})
hexes_big_tagged = set_id(hexes_big_filtered)
hexes_small_tagged = set_id(hexes_small_filtered)
def trim(df):
"""
Trims down the columns.
"""
return df[[
'hex_id',
'geometry'
]]
hexes_big_trimmed = trim(hexes_big_tagged)
hexes_small_trimmed = trim(hexes_small_tagged)
hexes_big_trimmed.to_file(os.path.join(output_dir, 'hexes-big.shp'))
hexes_small_trimmed.to_file(os.path.join(output_dir, 'hexes-small.shp'))
def sjoin_batch(points_path, verbose=False):
"""
Reads in a batch of buildings and returns them with a column that indicates if they fall within a grid
"""
number = points_path.split("-batch-")[1].replace(".shp", "")
outpath_big = f"{output_dir}/california_big_hex_points_batch_{number}.csv"
outpath_small = f"{output_dir}/california_small_hex_points_batch_{number}.csv"
if os.path.exists(outpath_big) and os.path.exists(outpath_small):
if verbose:
print(f"Skipping {number}")
return
# Read in the file
try:
print(f"Opening {points_path}")
points = gpd.read_file(points_path)
except:
return
# Run a spatial join against the fire zones
sjoin_big = gpd.sjoin(points, hexes_big_trimmed, how="left", op="intersects")
sjoin_small = gpd.sjoin(points, hexes_small_trimmed, how="left", op="intersects")
if verbose:
print(f"{len(sjoin_big[~gpd.pd.isnull(sjoin_big.hex_id)])}/{len(sjoin_big)} from {points_path} in a big hex")
print(f"{len(sjoin_small[~gpd.pd.isnull(sjoin_small.hex_id)])}/{len(sjoin_small)} from {points_path} in a small hex")
if verbose:
print(f"Writing {outpath_big}")
sjoin_big[["FID", "hex_id"]].to_csv(outpath_big, index=False)
if verbose:
print(f"Writing {outpath_small}")
sjoin_small[["FID", "hex_id"]].to_csv(outpath_small, index=False)
path_list = sorted(
glob.glob(os.path.join(output_dir, "california-building-points-batch-*.shp")),
key=lambda x: int(x.split("-batch-")[1].replace(".shp", ""))
)
len(path_list)
1099
sjoin_batch(path_list[0], verbose=True)
Skipping 1
for p in path_list: sjoin_batch(p)
def merge(pattern):
"""
Merge a set of CSVs.
"""
csv_list = glob.glob(os.path.join(output_dir, pattern))
return gpd.pd.concat([gpd.pd.read_csv(f) for f in csv_list])
merged_big = merge('california_big_hex_points_batch_*.csv')
merged_small = merge('california_small_hex_points_batch_*.csv')
len(merged_big), len(merged_small)
(10988525, 10988525)
merged_big.to_csv(os.path.join(output_dir, "california-building-points-to-big-hexes.csv"), index=False)
merged_small.to_csv(os.path.join(output_dir, "california-building-points-to-small-hexes.csv"), index=False)