#!/usr/bin/env python # coding: utf-8 # # Convert a pandas dataframe to geojson for web-mapping # # Author: Geoff Boeing # # Original: [pandas-to-geojson](https://github.com/gboeing/urban-data-science/blob/dc86c9c89b73f87f97301883d7456f1f814589f5/17-Leaflet-Web-Mapping/pandas-to-geojson.ipynb) # In[1]: import pandas as pd, requests, json # First download data from the city of Berkeley's API. You can use Socrata's $limit parameter to specify how many rows to grab (otherwise the default is 1,000 rows of data): https://dev.socrata.com/docs/paging.html # # Example request: https://data.cityofberkeley.info/resource/k489-uv4i.json?$limit=5 # In[2]: # API endpoint for city of Berkeley's 311 calls endpoint_url = 'https://data.cityofberkeley.info/resource/bscu-qpbu.json?$limit=20&$where=latitude%20%3C%3E%20%22%22' # In[3]: # fetch the URL and load the data response = requests.get(endpoint_url) data = response.json() # Next, turn the json data into a dataframe and clean it up a bit: drop unnecessary columns and any rows that lack lat-long data. We want to make our json file as small as possible (prefer under 5 mb) so that it can be loaded over the Internet to anyone viewing your map, without taking forever to download a huge file. # In[4]: # turn the json data into a dataframe and see how many rows and what columns we have df = pd.DataFrame(data) print('We have {} rows'.format(len(df))) str(df.columns.tolist()) # In[5]: # convert lat-long to floats and change address from ALL CAPS to regular capitalization df['latitude'] = df['latitude'].astype(float) df['longitude'] = df['longitude'].astype(float) df['street_address'] = df['street_address'].str.title() # In[6]: # we don't need all those columns - only keep useful ones cols = ['request_detail', 'request_subcategory', 'latitude', 'longitude', 'street_address', 'case_status'] df_subset = df[cols] # In[7]: # drop any rows that lack lat/long data df_geo = df_subset.dropna(subset=['latitude', 'longitude'], axis=0, inplace=False) print('We have {} geotagged rows'.format(len(df_geo))) df_geo.tail() # In[8]: # what is the distribution of issue types? df_geo['request_subcategory'].value_counts() # Finally, convert each row in the dataframe to a geojson-formatted feature and save the result as a file. The format is pretty simple and you can see it here: http://geojson.org/ # In[9]: def df_to_geojson(df, properties, lat='latitude', lon='longitude'): # create a new python dict to contain our geojson data, using geojson format geojson = {'type':'FeatureCollection', 'features':[]} # loop through each row in the dataframe and convert each row to geojson format for _, row in df.iterrows(): # create a feature template to fill in feature = {'type':'Feature', 'properties':{}, 'geometry':{'type':'Point', 'coordinates':[]}} # fill in the coordinates feature['geometry']['coordinates'] = [row[lon],row[lat]] # for each column, get the value and add it as a new feature property for prop in properties: feature['properties'][prop] = row[prop] # add this feature (aka, converted dataframe row) to the list of features inside our dict geojson['features'].append(feature) return geojson # In[14]: cols = [ 'street_address', 'request_detail', 'request_subcategory', 'case_status' ] geojson = df_to_geojson(df_geo, cols) # In [nteract](https://github.com/nteract/nteract), we can display geojson directly with the built-in leaflet renderer. # In[13]: import IPython IPython.display.display({'application/geo+json': geojson}, raw=True) # _Known temporary [issue](https://github.com/nteract/nteract/issues/2034): leaflet fails to render_