import pandas as pd # for data loading and manipulation import requests # for the request-function to OSRM import numpy as np # some direct matrix manipulations #import numexpr de = pd.read_table('data/germany-latest.tsv', low_memory=False ) # read OSM datadump of DE places de.shape # 3million spatial entities de.columns # OSM variables included de.type.value_counts() # counts by type of spatial feature # function to test of the place-name matches the triple e's and d's def match_hint(x): try: if x.lower().count('e') == x.lower().count('d') == 3: return True else: return False except AttributeError: return False # apply the criteria test-function to each name, and record matches de.ismatch = de.name.apply(match_hint) de.ismatch.value_counts() # we have 1483 matches across Germany de_match = de[de.ismatch] # filter out the matched entities de_match.type.value_counts() # it has to be a town of some sort, filter out the rest. de_towns = de_match[de_match.type.isin(['village', 'suburb', 'hamlet', 'suburb', 'town', 'neighbourhood', 'city'])] de_towns.type.value_counts() de_towns.type.shape # we have 50 places left de_towns.name # possible locations with the right name coords = zip(de_towns.lon, de_towns.lat) # create a list with latitude-longitude pairs labels = de_towns.name.tolist() # get a list of place-names coords.append((5.2205227, 51.023644)) # add the coordinates of Heusden to the list labels.append('Heusden') # add Heusden to the list of place names # Get the pairwise drive time in minutes for a list of coordinates, # using the OSM-based open source online routing webservice http://project-osrm.org/ # (code from python-osrm: https://github.com/ustroetz/python-osrm/blob/master/osrm/core.py#L142 ) def drivetime_table(list_coords, list_ids, output='df', host='http://localhost:5000'): """ Function wrapping OSRM 'table' function in order to get a matrix of time distance as a numpy array or as a DataFrame Params : list_coords: list A list of coord as [x, y] , like : list_coords = [[21.3224, 45.2358], [21.3856, 42.0094], [20.9574, 41.5286]] (coords have to be float) list_ids: list A list of the corresponding unique id, like : list_ids = ['name1', 'name2', 'name3'] (id can be str, int or float) host: str, default 'http://localhost:5000' Url and port of the OSRM instance (no final bakslash) output: str, default 'pandas' The type of matrice to return (DataFrame or numpy array) 'pandas', 'df' or 'DataFrame' for a DataFrame 'numpy', 'array' or 'np' for a numpy array Output: - 'numpy' : a numpy array containing the time in minutes (or NaN when OSRM encounter an error to compute a route) or - 'pandas' : a labeled DataFrame containing the time matrix in minutes (or NaN when OSRM encounter an error to compute a route) -1 is return in case of any other error (bad 'output' parameter, wrong list of coords/ids, unknow host, wrong response from the host, etc.) """ if output.lower() in ('numpy', 'array', 'np'): output = 1 elif output.lower() in ('pandas', 'dataframe', 'df'): output = 2 else: print('Unknow output parameter') return -1 query = [host, '/table?loc='] for coord, uid in zip(list_coords, list_ids): # Preparing the query tmp = ''.join([str(coord[1]), ',', str(coord[0]), '&loc=']) query.append(tmp) query = (''.join(query))[:-5] try: # Querying the OSRM local instance rep = requests.get(query) parsed_json = rep.json() except Exception as err: print('Error while contacting OSRM instance : \n{}'.format(err)) return -1 if 'distance_table' in parsed_json.keys(): # Preparing the result matrix mat = np.array(parsed_json['distance_table'], dtype='float64') if len(mat) < len(list_coords): print(('The array returned by OSRM is smaller to the size of the ' 'array requested\nOSRM parameter --max-table-size should be' ' increased or function osrm.table_OD(...) should be used')) return -1 mat = mat/(10*60) # Conversion in minutes mat = mat.round(1) mat[mat == 3579139.4] = np.NaN # Flag the errors with NaN if output == 1: return mat elif output == 2: df = pd.DataFrame(mat, index=list_ids, columns=list_ids, dtype=float) return df else: print('No distance table return by OSRM local instance') return -10 # get the driving-time in minutes for all the coords time_matrix = drivetime_table(coords, labels, output='dataframe', host='http://router.project-osrm.org/') time_matrix.shape # 51x51 matrix, for all german cities + Heusden solutions = time_matrix.loc[time_matrix.Heusden < 180, time_matrix.Heusden < 180] solutions # we are left with six posibilities