#!/usr/bin/env python # coding: utf-8 # # Data Observatory in cartoframes # # The [Data Observatory](https://carto.com/data-observatory/) can be accessed through CARTOframes. This is a basic demonstration how one would pull down new measures for building a feature set for training a model. # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import cartoframes from cartoframes import QueryLayer, Layer, styling import pandas as pd # Enter your username and api key below cc = cartoframes.CartoContext(base_url='https://{username}.carto.com/'.format(username=''), api_key='') # ## Getting Mexico City Metro station coordinates # # Use pandas to download an Excel spreadsheet into a dataframe. # In[2]: # Metro stations from here: # https://github.com/josecarlosgonz/mexicoCityMetro/blob/master/coordsMetro.xlsx df = pd.read_excel('https://github.com/josecarlosgonz/mexicoCityMetro/blob/master/coordsMetro.xlsx?raw=true') df.head() # Send to CARTO, being sure to specify the to-be-normalized column names `latitude.1` -> `latitude_1`, etc. # In[3]: orig_table = 'coordsmetro_demo' cc.write(df, orig_table, lnglat=('longitude_1', 'latitude_1'), overwrite=True) # ## See the data by `linea` # # _Note: notice the basemap labels are default on the bottom._ # In[4]: cc.map(layers=Layer(orig_table, color={'column': 'linea', 'scheme': styling.bold(10)})) # See a static version of the map above # In[5]: cc.map(layers=Layer(orig_table, color={'column': 'linea', 'scheme': styling.bold(10)}), interactive=False) # ## Data Observatory measures in the Mexico City area # # Let's get education-related Data Observatory measures around the metro stops. # In[6]: meta = cc.data_discovery(region=orig_table, keywords='education') meta.head() # In[7]: # See how many measures are possible meta.shape # In[8]: # Look at the geometry levels available meta.groupby('geom_id')['geom_id'].count() # Narrow down the problem to only have `municipio`-level measures. # In[9]: # select only the municipio level data meta = meta[meta['geom_id'] == 'mx.inegi.municipio'] # This takes it down to only 20 measures. # In[10]: meta.shape # Take a look at the measures we have # In[11]: meta['numer_name'].values # In[13]: # Get some data data = cc.data(orig_table, meta.loc[0:4]) data.head() # In[15]: new_table = 'mexico_metro_augmented' cc.write(data, new_table, overwrite=True) # In[19]: from cartoframes import Layer, BaseMap, styling cc.map(layers=[BaseMap('dark'), Layer(new_table, color={'column': 'female_employed_incomplete_secondary_education_rate_2010', 'scheme': styling.sunset(7)})])