Vectorized Containment in Shapely (cont'd)¶

In [1]:
import numpy as np
import fiona

from matplotlib import pyplot
import matplotlib.colors as mcolors
from descartes import PolygonPatch

from shapely.geometry import MultiPolygon, Point
from shapely.geometry import shape
from shapely.vectorized import contains


Test dataset, State of Florida (United States)¶

In [2]:
florida = [shape(f['geometry']) for f in fiona.open('shapefiles/florida.shp')][0]

x_i, y_i, x_j, y_j = florida.bounds

print 'Boundaries for Florida\n'
print 'x\t\ty\t'
print '--\t\t--\t'
print '{}\t{}\n{}\t{}'.format(*florida.bounds)

Boundaries for Florida

x		y
--		--
-87.634917896	24.523213195
-80.031439446	31.000962206

In [3]:
xv, yv = np.linspace(x_i, x_j, num=100), np.linspace(y_i, y_j, num=100)
xm, ym = np.meshgrid(xv, yv)
x, y = np.concatenate(xm), np.concatenate(ym)

In [4]:
florida_mask = contains(florida, x, y)


Visualized¶

In [5]:
fig = pyplot.figure(figsize=(12, 12))
ax.set_axis_off()
ax.set_aspect('equal')
cmap = mcolors.ListedColormap(['#008B8B', '#FF69B4'])

for p in florida:

pyplot.scatter(x, y, s=(florida_mask + 0.5) * 15., c=florida_mask + 1, edgecolor='none', cmap=cmap)

pyplot.plot()

Out[5]:
[]

Benchmarks¶

In [6]:
import shapely.prepared

def pure_python_contains(geom, x, y):
"""Pure Python contains implementation.

Borrowed from http://nbviewer.ipython.org/gist/pelson/9785576
"""
prep = shapely.prepared.prep(geom)
result = np.empty(x.shape, dtype=np.bool)
for index in np.ndindex(result.shape):
result[index] = prep.contains(Point(x[index], y[index]))


With 10k points...¶

In [7]:
xv, yv = np.linspace(x_i, x_j, num=100), np.linspace(y_i, y_j, num=100)
xm, ym = np.meshgrid(xv, yv)
x, y = np.concatenate(xm), np.concatenate(ym)

%timeit contains(florida, x, y)
%timeit pure_python_contains(florida, x, y)

10 loops, best of 3: 37.3 ms per loop
1 loops, best of 3: 348 ms per loop


With 1 million points...¶

In [8]:
xv, yv = np.linspace(x_i, x_j, num=1000), np.linspace(y_i, y_j, num=1000)
xm, ym = np.meshgrid(xv, yv)
x, y = np.concatenate(xm), np.concatenate(ym)

%timeit contains(florida, x, y)
%timeit pure_python_contains(florida, x, y)

1 loops, best of 3: 1.24 s per loop
1 loops, best of 3: 32.6 s per loop