import geopandas
import pygeos
# Quick copy of the relevant part of https://github.com/pyviz/datashader/pull/819
import numpy as np
def from_geopandas(ga):
line_parts = [
_shapely_to_array_parts(shape) for shape in ga
]
line_lengths = [
sum([len(part) for part in parts])
for parts in line_parts
]
flat_array = np.concatenate(
[part for parts in line_parts for part in parts]
)
start_indices = np.concatenate(
[[0], line_lengths[:-1]]
).astype('uint').cumsum()
return start_indices, flat_array
def _polygon_to_array_parts(polygon):
import shapely.geometry as sg
shape = sg.polygon.orient(polygon)
exterior = np.asarray(shape.exterior.ctypes)
polygon_parts = [exterior]
hole_separator = np.array([-np.inf, -np.inf])
for ring in shape.interiors:
interior = np.asarray(ring.ctypes)
polygon_parts.append(hole_separator)
polygon_parts.append(interior)
return polygon_parts
def _shapely_to_array_parts(shape):
import shapely.geometry as sg
if isinstance(shape, sg.Polygon):
# Single polygon
return _polygon_to_array_parts(shape)
elif isinstance(shape, sg.MultiPolygon):
shape = list(shape)
polygon_parts = _polygon_to_array_parts(shape[0])
polygon_separator = np.array([np.inf, np.inf])
for polygon in shape[1:]:
polygon_parts.append(polygon_separator)
polygon_parts.extend(_polygon_to_array_parts(polygon))
return polygon_parts
else:
raise ValueError("""
Received invalid value of type {typ}. Must be an instance of
shapely.geometry.Polygon or shapely.geometry.MultiPolygon"""
.format(typ=type(shape).__name__))
Testing on the "countries" data:
df = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
Datashader's method:
from_geopandas(df.geometry.array)
(array([ 0, 48, 152, 208, 1854, 2766, 2990, 3098, 3266, 3790, 4034, 4264, 4512, 4586, 4660, 4822, 4938, 4980, 5032, 6308, 6354, 6374, 6556, 6820, 6838, 6860, 7050, 7074, 7414, 7456, 7862, 7982, 8134, 8334, 8438, 8510, 8614, 8728, 8768, 8838, 8878, 9062, 9142, 9194, 9346, 9412, 9430, 9452, 9536, 9610, 9690, 9778, 9866, 10018, 10096, 10146, 10262, 10378, 10500, 10538, 10588, 10680, 10820, 10858, 10912, 10956, 11034, 11158, 11256, 11318, 11332, 11454, 11510, 11668, 11690, 11842, 11868, 11920, 11942, 12040, 12058, 12090, 12152, 12276, 12314, 12358, 12376, 12394, 12454, 12550, 12576, 12610, 12738, 12812, 12952, 13040, 13138, 13176, 13326, 13598, 13670, 13696, 13742, 13874, 14012, 14094, 14164, 14272, 14424, 14478, 14518, 14598, 14688, 14874, 14964, 15038, 15102, 15156, 15244, 15282, 15326, 15364, 15480, 15536, 15646, 15784, 15832, 15918, 15966, 15980, 16014, 16044, 16110, 16212, 16238, 16264, 16346, 16480, 16964, 16984, 17466, 17484, 17662, 17712, 17826, 17866, 17956, 18004, 18236, 18362, 18378, 18414, 18494, 18560, 18630, 18686, 18820, 18886, 18974, 19126, 20462, 20494, 20524, 20650, 20738, 20850, 20968, 20998, 21050, 21106, 21134, 21180, 21216, 21312, 21348, 21390, 21406], dtype=uint64), array([180. , -16.06713266, 179.41350936, ..., 4.17369904, 30.83385242, 3.5091716 ]))
%timeit from_geopandas(df.geometry.array)
46.2 ms ± 4.34 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Using pygeos:
arr = pygeos.from_wkb(geopandas.array.to_wkb(df.geometry.array))
pygeos.get_coordinates(arr)
array([[180. , -16.06713266], [180. , -16.55521657], [179.36414266, -16.80135408], ..., [ 31.88145 , 3.55827 ], [ 31.24556 , 3.7819 ], [ 30.83385242, 3.5091716 ]])
%timeit pygeos.get_coordinates(arr)
154 µs ± 15.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
Note: this currently gives a 2D array (not x and y intertwined) and it does not give the start indices, but I think having a version that does those things should not be a order of magnitude slower.