# default_exp osm_data
Collecting and exploring different buildings from Openstreetmap
#hide
from nbdev.showdoc import *
import pandas as pd
import geopandas as gpd
import osmnx as ox
import matplotlib.pyplot as plt
import contextily as cx
shop_types = pd.read_csv("../data/osm_shop_types.csv")
shop_types.shape
(169, 5)
shop_types.head()
Key | Value | Comment | Group Name | Group | |
---|---|---|---|---|---|
0 | shop | alcohol | Shop selling alcohol to take away | Food, beverages | Food |
1 | shop | bakery | Shop focused on selling bread | Food, beverages | Food |
2 | shop | beverages | Shop focused on selling alcoholic and non-alco... | Food, beverages | Food |
3 | shop | brewing_supplies | Shop focused on selling supplies for home brew... | Food, beverages | Food |
4 | shop | butcher | Shop focused on selling meat | Food, beverages | Food |
# Plot shops from a place
def plot_shops(place):
tags = {'building':True}
bds = ox.geometries_from_place(place, tags)
tags = {'amenity':True}
ams = ox.geometries_from_place(place, tags)
tags = {"shop":True}
shs = ox.geometries_from_place(place, tags)
f, ax = plt.subplots(1, figsize=(20, 20))
bds.plot(marker=1, color='black', ax=ax)
ams.plot(ax=ax, color='b', marker=1, alpha=0.7)
shs.plot(ax=ax, alpha=0.8, color='r', marker=1)
cx.add_basemap(ax, crs=bds.crs.to_string(), source=cx.providers.Stamen.TonerLite)
ax.set_axis_off()
plt.show()
place = "City of Monash, Victoria, Australia"
tags = {"shop":True, "opening_hours":"*"}
shops = ox.geometries_from_place(place, tags)
shops.shape
(590, 69)
shops.shop.value_counts().head(50)
convenience 59 car_repair 41 hairdresser 39 supermarket 36 bakery 31 car 24 alcohol 24 beauty 22 butcher 18 clothes 17 travel_agency 13 massage 12 computer 11 mall 11 yes 11 greengrocer 10 doityourself 9 department_store 9 optician 9 florist 7 electronics 7 laundry 7 funeral_directors 6 gift 6 variety_store 6 newsagent 6 furniture 6 sports 6 tobacco 5 mobile_phone 4 car_parts 4 deli 4 second_hand 4 jewelry 4 pet 3 stationery 3 dry_cleaning 3 music 3 pawnbroker 3 shoes 3 tattoo 3 houseware 3 garden_centre 3 lottery 3 pastry 3 copyshop 2 books 2 wholesale 2 tailor 2 seafood 2 Name: shop, dtype: int64
plc = "City of Monash, Victoria, Australia"
plot_shops(plc)
plc = "Melbourne District, Melbourne, City of Melbourne, Victoria, Australia"
plot_shops(plc)
/opt/conda/lib/python3.9/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. and should_run_async(code)
place = "Victoria, Australia"
tags = {"shop":True, "opening_hours":"*"}
shops = ox.geometries_from_place(place, tags)
shops.shape
(14794, 369)
shops.shape, type(shops)
((14794, 374), geopandas.geodataframe.GeoDataFrame)
columns = ['unique_id',
'osmid',
'element_type',
'addr:city',
'addr:housenumber',
'addr:postcode',
'addr:state',
'addr:street',
'postal_code',
'name',
'opening_hours','opening_date',
'operator',
'phone',
'shop',
'source',
'building:levels',
'building',
'geometry', 'lat', 'long',
'brand', 'landuse','state',
'Key', 'Value', 'Group Name', 'Group'
]
shops[columns].head().T
/opt/conda/lib/python3.9/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. and should_run_async(code)
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
unique_id | node/1466116159 | node/4829722523 | node/5240167118 | way/257720343 | way/257723588 |
osmid | 1466116159 | 4829722523 | 5240167118 | 257720343 | 257723588 |
element_type | node | node | node | way | way |
addr:city | Yanakie | NaN | NaN | NaN | NaN |
addr:housenumber | 3640 | NaN | NaN | NaN | NaN |
addr:postcode | 3960 | NaN | NaN | NaN | NaN |
addr:street | Meeniyan - Promontory Road | NaN | NaN | NaN | NaN |
postal_code | NaN | NaN | NaN | NaN | NaN |
name | Yanakie General Store | NaN | NaN | NaN | NaN |
opening_hours | 08:00-18:00 | NaN | NaN | NaN | NaN |
operator | Foodland Express | NaN | NaN | NaN | NaN |
phone | +61 3 5687 1200 | NaN | NaN | NaN | NaN |
shop | supermarket | supermarket | laundry | general | ticket |
source | local knowledge | NaN | NaN | NaN | NaN |
building:levels | NaN | NaN | NaN | 1 | NaN |
building | NaN | NaN | NaN | NaN | yes |
geometry | POINT (146.2076606 -38.8125072) | POINT (146.3209425 -39.0307625) | POINT (146.3178541 -39.0292552) | POLYGON ((146.3210142 -39.0310104, 146.3210156... | POLYGON ((146.249307 -38.858471, 146.2493843 -... |
brand | NaN | NaN | NaN | NaN | NaN |
landuse | NaN | NaN | NaN | NaN | NaN |
state | NaN | NaN | NaN | NaN | NaN |
Key | shop | shop | shop | shop | shop |
Value | supermarket | supermarket | laundry | general | ticket |
Group Name | General store, department store, mall | General store, department store, mall | Others | General store, department store, mall | Stationery, gifts, books, newspapers |
Group | Mall | Mall | Others | Mall | Stationery |
shops.Group.value_counts()
/opt/conda/lib/python3.9/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. and should_run_async(code)
Food 3742 Mall 1893 Vehicles 1885 Beauty 1231 Clothes 1190 Others 1008 Housewares 779 Stationery 605 Furniture 485 Health 459 Discount store 290 Electronics 273 Arts 253 Name: Group, dtype: int64
# Save to json file
shops[columns].to_file(f"../data/toronto/raw/all_shops_victoria_australia.geojson", driver='GeoJSON')
shops[shops.geometry.geom_type.isin(['Point'])].shop.value_counts().head(60)
supermarket 917 convenience 801 hairdresser 661 clothes 649 bakery 547 alcohol 518 car_repair 495 car 268 beauty 259 butcher 230 furniture 186 department_store 182 laundry 177 bicycle 166 massage 157 greengrocer 151 gift 147 yes 142 car_parts 139 travel_agency 129 florist 128 books 127 newsagent 125 jewelry 118 wine 116 variety_store 105 doityourself 103 electronics 99 mobile_phone 96 optician 94 deli 90 outdoor 85 shoes 83 pet 79 computer 73 charity 72 art 71 houseware 68 dry_cleaning 67 sports 67 hardware 65 vacant 65 tyres 55 chemist 53 interior_decoration 51 seafood 51 garden_centre 49 tobacco 49 toys 45 stationery 41 lottery 41 copyshop 41 beverages 40 tattoo 40 funeral_directors 38 confectionery 35 antiques 35 pastry 33 second_hand 32 music 30 Name: shop, dtype: int64
plc = "Toronto, Golden Horseshoe, Ontario, Canada"
plot_shops(plc)
/opt/conda/lib/python3.9/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above. and should_run_async(code)
# The Greater Toronto and Hamilton Area (GTHA)
GTHA = {"Toronto":1, "Durham Region":2, "York Region":3, "Peel Region":4, "Halton Region":5, "Hamilton":6}
GTHA
for region, code in GTHA.items():
print(region, code)
plc = region + ", Golden Horseshoe, Ontario, Canada"
plot_shops(plc)
place = "Ontario, Canada"
tags = {"shop":True, "opening_hours":"*"}
can_shops = ox.geometries_from_place(place, tags)
can_shops.shape
(48557, 558)
columns = ['unique_id',
'osmid',
'element_type',
'addr:city',
'addr:housenumber',
'addr:postcode',
'addr:street',
'postal_code',
'name',
'opening_hours',
'operator',
'phone',
'shop',
'source',
'building:levels',
'building',
'geometry',
'brand', 'landuse','state',
'Key', 'Value', 'Group Name', 'Group'
]
# Save to json file
can_shops[columns].to_file(f"../data/toronto/raw/all_shops_ontario_canada.geojson", driver='GeoJSON')
can_shops = can_shops.merge(shop_types, how="left", left_on="shop", right_on="Value")
can_shops.Group.value_counts()
Food 7592 Clothes 7390 Others 5996 Beauty 5725 Vehicles 5205 Mall 2699 Health 2322 Housewares 2194 Electronics 2178 Furniture 1905 Stationery 1470 Discount store 1271 Arts 1138 Name: Group, dtype: int64
shops.Group.value_counts()
Food 3742 Mall 1893 Vehicles 1885 Beauty 1231 Clothes 1190 Others 1008 Housewares 779 Stationery 605 Furniture 485 Health 459 Discount store 290 Electronics 273 Arts 253 Name: Group, dtype: int64