Merge grids¶

Group our building points into grids that segment that state.

In [1]:

import os
import glob
import geopandas as gpd

In [2]:

%store -r

In [9]:

hexes_big = gpd.read_file(os.path.join(input_dir, "grids", "grid-x075.shp"))

In [8]:

hexes_small = gpd.read_file(os.path.join(input_dir, "grids", "grid-x05.shp"))

In [3]:

cali = gpd.read_file(os.path.join(input_dir, "state", "state.shp"))

In [5]:

cali['geometry'] = cali.buffer(0.1)

In [6]:

def cali_filter(df):
    """
    Clips the provided geodataframe at the California border.
    """
    return df[df.within(cali.ix[0].geometry)]

In [10]:

hexes_big_filtered = cali_filter(hexes_big)

/home/palewire/.virtualenvs/california-fire-zone-analysis/local/lib/python3.6/site-packages/ipykernel_launcher.py:5: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """

In [11]:

hexes_small_filtered = cali_filter(hexes_small)

/home/palewire/.virtualenvs/california-fire-zone-analysis/local/lib/python3.6/site-packages/ipykernel_launcher.py:5: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """

In [43]:

def set_id(df):
    """
    Create a unique identifier for each grid.
    """
    return df.reset_index().rename(columns={"index": "hex_id"})

In [44]:

hexes_big_tagged = set_id(hexes_big_filtered)

In [45]:

hexes_small_tagged = set_id(hexes_small_filtered)

In [46]:

def trim(df):
    """
    Trims down the columns.
    """
    return df[[
        'hex_id',
        'geometry'
    ]]

In [47]:

hexes_big_trimmed = trim(hexes_big_tagged)

In [48]:

hexes_small_trimmed = trim(hexes_small_tagged)

In [80]:

hexes_big_trimmed.to_file(os.path.join(output_dir, 'hexes-big.shp'))

In [81]:

hexes_small_trimmed.to_file(os.path.join(output_dir, 'hexes-small.shp'))

In [66]:

def sjoin_batch(points_path, verbose=False):
    """
    Reads in a batch of buildings and returns them with a column that indicates if they fall within a grid
    """
    number = points_path.split("-batch-")[1].replace(".shp", "")
    outpath_big = f"{output_dir}/california_big_hex_points_batch_{number}.csv"
    outpath_small = f"{output_dir}/california_small_hex_points_batch_{number}.csv"
    if os.path.exists(outpath_big) and os.path.exists(outpath_small):
        if verbose:
            print(f"Skipping {number}")
        return
    
    # Read in the file
    try:
        print(f"Opening {points_path}")
        points = gpd.read_file(points_path)
    except:
        return
    
    # Run a spatial join against the fire zones
    sjoin_big = gpd.sjoin(points, hexes_big_trimmed, how="left", op="intersects")
    sjoin_small = gpd.sjoin(points, hexes_small_trimmed, how="left", op="intersects")
    
    if verbose:
        print(f"{len(sjoin_big[~gpd.pd.isnull(sjoin_big.hex_id)])}/{len(sjoin_big)} from {points_path} in a big hex")
        print(f"{len(sjoin_small[~gpd.pd.isnull(sjoin_small.hex_id)])}/{len(sjoin_small)} from {points_path} in a small hex")
    
    if verbose:
        print(f"Writing {outpath_big}")
    sjoin_big[["FID", "hex_id"]].to_csv(outpath_big, index=False)
    
    if verbose:
        print(f"Writing {outpath_small}")
    sjoin_small[["FID", "hex_id"]].to_csv(outpath_small, index=False)

In [67]:

path_list = sorted(
    glob.glob(os.path.join(output_dir, "california-building-points-batch-*.shp")),
    key=lambda x: int(x.split("-batch-")[1].replace(".shp", ""))
)

In [68]:

len(path_list)

Out[68]:

In [69]:

sjoin_batch(path_list[0], verbose=True)

Skipping 1

In [70]:

for p in path_list: sjoin_batch(p)

In [73]:

def merge(pattern):
    """
    Merge a set of CSVs.
    """
    csv_list = glob.glob(os.path.join(output_dir, pattern))
    return gpd.pd.concat([gpd.pd.read_csv(f) for f in csv_list])

In [74]:

merged_big = merge('california_big_hex_points_batch_*.csv')

In [75]:

merged_small = merge('california_small_hex_points_batch_*.csv')

In [78]:

len(merged_big), len(merged_small)

Out[78]:

(10988525, 10988525)

In [76]:

merged_big.to_csv(os.path.join(output_dir, "california-building-points-to-big-hexes.csv"), index=False)

In [77]:

merged_small.to_csv(os.path.join(output_dir, "california-building-points-to-small-hexes.csv"), index=False)