Vectorized Containment in Shapely (cont'd)

In [1]:
import numpy as np
import fiona

from matplotlib import pyplot
import matplotlib.colors as mcolors
from descartes import PolygonPatch

from shapely.geometry import MultiPolygon, Point
from shapely.geometry import shape
from shapely.vectorized import contains

Test dataset, State of Florida (United States)

In [2]:
florida = [shape(f['geometry']) for f in fiona.open('shapefiles/florida.shp')][0]

x_i, y_i, x_j, y_j = florida.bounds

print 'Boundaries for Florida\n'
print 'x\t\ty\t'
print '--\t\t--\t'
print '{}\t{}\n{}\t{}'.format(*florida.bounds)
Boundaries for Florida

x		y	
--		--	
-87.634917896	24.523213195
-80.031439446	31.000962206
In [3]:
xv, yv = np.linspace(x_i, x_j, num=100), np.linspace(y_i, y_j, num=100)
xm, ym = np.meshgrid(xv, yv)
x, y = np.concatenate(xm), np.concatenate(ym)
In [4]:
florida_mask = contains(florida, x, y)

Visualized

In [5]:
fig = pyplot.figure(figsize=(12, 12))
ax = fig.add_subplot(111)
ax.set_axis_off()
ax.set_aspect('equal')
cmap = mcolors.ListedColormap(['#008B8B', '#FF69B4'])

for p in florida:
    ax.add_patch(PolygonPatch(p, fc='#DCDCDC', ec='#000000', zorder=-1))

pyplot.scatter(x, y, s=(florida_mask + 0.5) * 15., c=florida_mask + 1, edgecolor='none', cmap=cmap)

pyplot.plot()
Out[5]:
[]

Benchmarks

In [6]:
import shapely.prepared

def pure_python_contains(geom, x, y):
    """Pure Python ```contains``` implementation.
    
    Borrowed from http://nbviewer.ipython.org/gist/pelson/9785576
    """
    prep = shapely.prepared.prep(geom)
    result = np.empty(x.shape, dtype=np.bool)
    for index in np.ndindex(result.shape):
        result[index] = prep.contains(Point(x[index], y[index]))

With 10k points...

In [7]:
xv, yv = np.linspace(x_i, x_j, num=100), np.linspace(y_i, y_j, num=100)
xm, ym = np.meshgrid(xv, yv)
x, y = np.concatenate(xm), np.concatenate(ym)

%timeit contains(florida, x, y)
%timeit pure_python_contains(florida, x, y)
10 loops, best of 3: 37.3 ms per loop
1 loops, best of 3: 348 ms per loop

With 1 million points...

In [8]:
xv, yv = np.linspace(x_i, x_j, num=1000), np.linspace(y_i, y_j, num=1000)
xm, ym = np.meshgrid(xv, yv)
x, y = np.concatenate(xm), np.concatenate(ym)

%timeit contains(florida, x, y)
%timeit pure_python_contains(florida, x, y)
1 loops, best of 3: 1.24 s per loop
1 loops, best of 3: 32.6 s per loop