#!/usr/bin/env python # coding: utf-8 # # [matta](https://github.com/carnby/matta) - view and scaffold d3.js visualizations in IPython notebooks # # ## Let's Make a Map Too (and a Cartogram!) # # Inspired by [Mike Bostock's _Let's Make a Map_](http://bost.ocks.org/mike/map/), we want to make a map too using matta. # We will display the [communes of Santiago, Chile](https://en.wikipedia.org/wiki/Santiago#Political_divisions). To do that we will perform the following steps: # # 1. Download the administrative borders of Chile in a shapefile from the [Chilean Library of Congress](http://siit2.bcn.cl/mapas_vectoriales/index_html/). # 2. Use ogr2ogr to filter and clip the borders of the city of Santiago, as well as converting the result to GeoJSON. # 3. Convert the GeoJSON file to TopoJSON. # 4. Display the TopoJSON file using matta in the IPython notebook. # 5. Download Human Development Index from Wikipedia and make a choropleth/symbol map using matta. # In[2]: from __future__ import print_function, unicode_literals # In[1]: import matta import json import unicodedata # we do this to load the required libraries when viewing on NBViewer matta.init_javascript(path='https://rawgit.com/carnby/matta/master/matta/libs') # ### Download Shapefiles # # Note We delete data to start from 0 # In[4]: get_ipython().system('rm -fr data') get_ipython().system('mkdir data') get_ipython().system('wget http://siit2.bcn.cl/obtienearchivo?id=repositorio/10221/10396/1/division_comunal.zip -O data/division_comunal.zip') # In[5]: get_ipython().system('unzip data/division_comunal.zip -d data/') # ### Convert to GeoJSON # # You can use `ogrinfo` to see the structure of the source shapefile. # In[6]: get_ipython().system("ogrinfo data/division_comunal.shp 'division_comunal' | head -n 30") # Now we use ogr2ogr to convert the shapefile into GeoJSON. We will build a file for Santiago (its capital) only. # # Notes: # # * After some manual inspection, we know that NOM_PROV contains the name of the parent administrative divisions of the city. We use a where clause to filter those. # * We delete `santiago-comunas-geo.json` in case it exists (that is, when we re-run the notebook :) ). # * We use the `-clipdst` option to specify a bounding box obtained in [this site](http://boundingbox.klokantech.com/). We need it because some populated locations are much bigger in administrative terms than in population terms. # * We also use the `-t_srs EPSG:4326` option to convert the data coordinates to (longitude,latitude) pairs. # In[7]: get_ipython().system('ogr2ogr -where "NOM_PROV IN (\'Santiago\', \'Maipo\', \'Cordillera\', \'Chacabuco\', \'Talagante\')" -f GeoJSON -clipdst -70.828155 -33.635036 -70.452573 -33.302953 -t_srs EPSG:4326 data/santiago-comunas-geo.json data/division_comunal.shp') # ### From GeoJSON to TopoJSON # In[8]: get_ipython().system('topojson -p --id-property NOM_COM -s 0 -o data/santiago-comunas-topo.json data/santiago-comunas-geo.json') # In[9]: get_ipython().system('ls -lh data/*.json') # In[10]: def strip_accents(s): return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn') with open('data/santiago-comunas-topo.json', 'r') as f: stgo = json.load(f) for g in stgo['objects']['santiago-comunas-geo']['geometries']: g['id'] = strip_accents(g['id'].title()) g['properties']['id'] = g['id'] # Here is an example feature: # In[11]: stgo['objects']['santiago-comunas-geo']['geometries'][0] # ## Display TopoJSON using matta # In[12]: matta.cartography(geometry=stgo, leaflet=False, label='NOM_COM', fill_color={'value': 'id', 'palette': str('husl'), 'scale': 'ordinal', 'n_colors': 40}) # ## Choroplet and Symbol Map # # Now that we can load a map and giving some colors to areas, let's try to visualize some data. The easiest way to do so would be to get a Wikipedia page with data for Santiago. From it, we will get the *Human Development Index* (IDH in Spanish) score for each municipality of the city. # In[13]: import pandas as pd df = pd.read_html('https://es.wikipedia.org/wiki/Anexo:Comunas_de_Santiago_de_Chile', attrs={'class': 'sortable'}, header=0)[0] df.head() # Data is not clean due to locale settings. Fortunately, we just want the HDI column, which should be easy to convert to a meaningful float. Let's fix the HDI and Population columns. # In[14]: df['Comuna'] = [strip_accents(c).replace('?', '').title() for c in df['Comuna']] df['HDI'] = [float(c.split()[0].replace(',', '.')) for c in df['IDH (2003)?']] del df['IDH (2003)?'] df.head() # In[15]: df['Population'] = df['Población (2002)?'] * 1000 df = df.loc[:,('Comuna', 'HDI', 'Population')] df.head() # Much better! # In[16]: df.describe() # Now let's visualize HDI for each municipality. # In[17]: matta.cartography(geometry=stgo, label='NOM_COM', width=700, area_dataframe=df, area_feature_name='Comuna', area_color={'value': 'HDI', 'scale': 'threshold', 'palette': 'coolwarm', 'n_colors': 7, 'legend': True}, leaflet=False) # What happened there? We bound a DataFrame (`area_dataframe`) to the geometry by using the `area_feature_name` column in the DataFrame. Using this, our code was able to match a feature from the TopoJSON structure with a row from the DataFrame. Note that some areas are not drawn - they were not available in the DataFrame. # # We can also visualize this information using a [cartogram](https://en.wikipedia.org/wiki/Cartogram). According to Wikipedia: # # > A cartogram is a map in which some thematic mapping variable – such as travel time, population, or Gross National Product – is substituted for land area or distance. The geometry or space of the map is distorted in order to convey the information of this alternate variable. # # For instance, we observe that the areas that have a high HDI are, indeed, big. But how much people inhabit them? # In[38]: matta.cartogram(geometry=stgo, width=700, area_dataframe=df, area_feature_name='Comuna', area_opacity=1.0, area_color={'value': 'HDI', 'scale': 'threshold', 'palette': 'coolwarm', 'n_colors': 7}, area_value='Population', na_value=df['Population'].min() * 0.75) # As we can see, the shape of each area is different now! The differences are not very dramatic, but this is just an example. # In[ ]: