import pandas as pd
import numpy as np
from geopy.distance import great_circle
from geopy.distance import vincenty
# UK bounding box
N = 55.811741
E = 1.768960
S = 49.871159
W = -6.379880
# this will give us a bit more than 400k pairs
num_coords = 633
df = pd.DataFrame({
'lon': list(np.random.uniform(W, E, [num_coords])),
'lat': list(np.random.uniform(S, N, [num_coords]))
})
df['coords'] = zip(df.lat, df.lon)
df.head()
lat | lon | coords | |
---|---|---|---|
0 | 52.239760 | 1.609915 | (52.2397597274, 1.609915459) |
1 | 53.657153 | -3.949987 | (53.6571534399, -3.94998656551) |
2 | 54.724409 | -1.078950 | (54.7244094226, -1.0789501053) |
3 | 51.183825 | 0.963573 | (51.1838249282, 0.963573094895) |
4 | 54.811744 | -1.010206 | (54.8117436496, -1.01020628852) |
square = pd.DataFrame(
np.zeros(len(df) ** 2).reshape(len(df), len(df)),
index=df.index, columns=df.index)
def get_distance(col):
"""
This function looks up our 'end' coordinates from the df
DataFrame using the input column name, then applies the
geopy vincenty() function to each row in the input column,
using the square.coords column as the first argument.
This works because the function is applied column-wise from right to left.
"""
end = df.ix[col.name]['coords']
return df['coords'].apply(vincenty, args=(end,), ellipsoid='WGS-84')
This is not efficient – it runs in around 15 wall-clock seconds on my 3.4GHz iMac
There's probably a Scipy pairwise distance function that does a much better job
distances = square.apply(get_distance, axis=1).T
distances.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 km | 405.50348899 km | 329.084912497 km | 125.690948608 km | 334.809546214 km | 363.977284175 km | 382.757346144 km | 535.936672603 km | 453.455691916 km | 308.493357486 km | ... | 400.087508977 km | 111.975894721 km | 439.43400121 km | 180.24140288 km | 411.642775795 km | 407.386946563 km | 483.234335798 km | 375.739768332 km | 194.674392977 km | 301.45740391 km |
1 | 405.50348899 km | 0.0 km | 221.861288259 km | 432.830979123 km | 230.757869772 km | 158.095284662 km | 110.043557538 km | 147.351736592 km | 148.457521906 km | 267.013330437 km | ... | 144.457624755 km | 454.323538103 km | 312.934572811 km | 277.655527769 km | 82.5152289072 km | 99.3284506038 km | 109.490313755 km | 230.269722838 km | 286.774422855 km | 194.711412469 km |
2 | 329.084912497 km | 221.861288259 km | 0.0 km | 417.196927799 km | 10.6814527 km | 70.863645424 km | 298.323147682 km | 273.910722975 km | 157.066357114 km | 49.2445247693 km | ... | 335.812017502 km | 423.039491047 km | 483.025940498 km | 149.04116848 km | 291.780083538 km | 144.491807851 km | 222.095480532 km | 391.732838315 km | 138.26555647 km | 317.903302763 km |
3 | 125.690948608 km | 432.830979123 km | 417.196927799 km | 0.0 km | 424.885071457 km | 435.546234701 km | 377.839985968 km | 576.231623356 km | 515.485469459 km | 407.42856463 km | ... | 383.454043729 km | 33.7054490333 km | 372.798143239 km | 277.081424576 km | 414.272250674 km | 462.044510708 km | 527.921396963 km | 331.220487884 km | 296.642676261 km | 277.089868144 km |
4 | 334.809546214 km | 230.757869772 km | 10.6814527 km | 424.885071457 km | 0.0 km | 77.361013525 km | 308.571537322 km | 278.28526467 km | 159.934984822 km | 44.921193572 km | ... | 346.110366859 km | 430.122218324 km | 493.684280642 km | 154.571809629 km | 301.534166914 km | 150.960361859 km | 227.157332278 km | 402.391764394 km | 142.486100572 km | 328.572031833 km |
5 rows × 633 columns
def units(input_instance):
return input_instance.meters
distances_meters = distances.applymap(units)
distances_meters.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.000000 | 405503.488990 | 329084.912497 | 125690.948608 | 334809.546214 | 363977.284175 | 382757.346144 | 535936.672603 | 453455.691916 | 308493.357486 | ... | 400087.508977 | 111975.894721 | 439434.001210 | 180241.402880 | 411642.775795 | 407386.946563 | 483234.335798 | 375739.768332 | 194674.392977 | 301457.403910 |
1 | 405503.488990 | 0.000000 | 221861.288259 | 432830.979123 | 230757.869772 | 158095.284662 | 110043.557538 | 147351.736592 | 148457.521906 | 267013.330437 | ... | 144457.624755 | 454323.538103 | 312934.572811 | 277655.527769 | 82515.228907 | 99328.450604 | 109490.313755 | 230269.722838 | 286774.422855 | 194711.412469 |
2 | 329084.912497 | 221861.288259 | 0.000000 | 417196.927799 | 10681.452700 | 70863.645424 | 298323.147682 | 273910.722975 | 157066.357114 | 49244.524769 | ... | 335812.017502 | 423039.491047 | 483025.940498 | 149041.168480 | 291780.083538 | 144491.807851 | 222095.480532 | 391732.838315 | 138265.556470 | 317903.302763 |
3 | 125690.948608 | 432830.979123 | 417196.927799 | 0.000000 | 424885.071457 | 435546.234701 | 377839.985968 | 576231.623356 | 515485.469459 | 407428.564630 | ... | 383454.043729 | 33705.449033 | 372798.143239 | 277081.424576 | 414272.250674 | 462044.510708 | 527921.396963 | 331220.487884 | 296642.676261 | 277089.868144 |
4 | 334809.546214 | 230757.869772 | 10681.452700 | 424885.071457 | 0.000000 | 77361.013525 | 308571.537322 | 278285.264670 | 159934.984822 | 44921.193572 | ... | 346110.366859 | 430122.218324 | 493684.280642 | 154571.809629 | 301534.166914 | 150960.361859 | 227157.332278 | 402391.764394 | 142486.100572 | 328572.031833 |
5 rows × 633 columns
distances_meters.loc[1, 3]
432830.97912272112