CARTO frames workshop

Full details at the documentation page. I'm using the master branch installed using

pip install https://github.com/CartoDB/cartoframes/archive/master.zip
In [1]:
import cartoframes
from cartoframes.credentials import set_credentials
from cartoframes.credentials import credentials
import pandas as pd
import os

Load the credentials

In [2]:
try:
    credentials()
    print('Getting the credentials from a previous session')
except Exception as e:
    print('Getting the credentials from your environment or here')
    BASEURL = os.environ.get('CARTO_API_URL','https://jsanz.carto.com') # <-- replace with your username or set up the envvar
    APIKEY = os.environ.get('CARTO_API_KEY',False) # <-- replace False with your CARTO API key or set up the envvar
    if BASEURL and APIKEY:
        set_credentials(base_url=BASEURL, api_key=APIKEY, overwrite=True)
    else:
        print('Set up your environment!')

if credentials():
    cc = cartoframes.CartoContext()
Getting the credentials from a previous session

Load the typical Populated Places dataset from CARTO

You can import this dataset from the Data Library

In [3]:
df = cc.read('populated_places')
df.head()
Out[3]:
_feature_count adm0_a3 adm0cap adm0name adm1name admin1_cod capalt capin changed checkme ... sov0name sov_a3 the_geom the_geom_webmercator timezone un_adm0 un_fid un_lat un_long worldcity
cartodb_id
2338 1 ZWE 0 Zimbabwe Midlands 0 0 None 0 0 ... Zimbabwe ZWE 0101000020E61000009083EF6FCBCC3D401F917ACCFAED... 0101000020110F0000F2266441234F4941E42EFE14AD60... Africa/Harare None 0 0.0 0.0 0
2339 1 ZWE 0 Zimbabwe Matabeleland South 0 0 None 4 0 ... Zimbabwe ZWE 0101000020E6100000B058CD4DEBD13B40A07C9217C37A... 0101000020110F0000F727F4EEA5A047419A347A28A3C6... Africa/Harare None 0 0.0 0.0 0
2340 1 ZWE 0 Zimbabwe Matabeleland South 0 0 None 0 0 ... Zimbabwe ZWE 0101000020E6100000B0B0E83570FD3D40A0872F4EA935... 0101000020110F0000B85E55667378494135E82B53835A... Africa/Harare None 0 0.0 0.0 0
2341 1 ZWE 0 Zimbabwe Matabeleland South 7 0 None 0 0 ... Zimbabwe ZWE 0101000020E61000007F20C3768F023D409096DD958AF0... 0101000020110F0000C6B95D4D61A3484146B100DE9331... Africa/Harare None 0 0.0 0.0 0
2342 1 ZWE 0 Zimbabwe Masvingo 0 0 None 0 0 ... Zimbabwe ZWE 0101000020E6100000A106041EF7A83F4030047E69B10C... 0101000020110F00002AE924B18CE34A41619B55DA2F4B... Africa/Harare None 0 0.0 0.0 0

5 rows × 96 columns

It's a Pandas data frame

You can get the featurecla field counts

In [4]:
df.groupby('featurecla').featurecla.count()
Out[4]:
featurecla
Admin-0 capital            202
Admin-0 capital alt         13
Admin-0 region capital      28
Admin-1 capital           2057
Admin-1 region capital      71
Historic place               6
Meteorological Station       1
Populated place           4925
Scientific station          40
Name: featurecla, dtype: int64

Run SQL queries

In [5]:
cc.query('''
SELECT featurecla,count(*) as counts
FROM populated_places
GROUP BY featurecla
''')
Out[5]:
counts featurecla
0 1 Meteorological Station
1 28 Admin-0 region capital
2 4925 Populated place
3 2057 Admin-1 capital
4 202 Admin-0 capital
5 71 Admin-1 region capital
6 6 Historic place
7 13 Admin-0 capital alt
8 40 Scientific station

Draw graphics using Seaborn

In [6]:
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
In [7]:
f, ax = plt.subplots(figsize=(16, 6))
sns.boxplot(x="featurecla", y="pop_min", data=df);

Render your tables from CARTO

In [8]:
from cartoframes import Layer, styling
l = Layer(
    'populated_places', 
    color={'column': 'featurecla','scheme': styling.prism(9)},
    size ={'column': 'pop_max','bin_method':'quantiles','bins' : 4, 'min': 3, 'max':10}
)
cc.map(layers=l, interactive=False)
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe97a756470>

Render queries from CARTO

In [13]:
from cartoframes import QueryLayer
l = QueryLayer(
    '''
    WITH capitals as (
      select * 
        from populated_places 
       where featurecla like 'Admin-0 capital'
    )
    select pp.cartodb_id,
           pp.pop_max,
           ST_MakeLine(c.the_geom,pp.the_geom) as the_geom,
           ST_MakeLine(c.the_geom_webmercator,pp.the_geom_webmercator) as the_geom_webmercator
    from populated_places pp join capitals c on pp.adm0_a3 = c.adm0_a3
    where c.adm0_a3 = 'ESP'
    ''', 
    color = {'column':'pop_max','scheme':styling.sunset(bins=3)},
    size ={'column': 'pop_max','bin_method':'quantiles','bins' : 4, 'min': 3, 'max':10}
)
cc.map(layers=l, interactive=False,zoom=4, lat=36.19, lng=-6.79)
Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe978e326a0>
In [12]:
l2 = QueryLayer(
    '''
    select *
    from populated_places
    where adm0_a3 = 'ESP'
    ''', 
    color = {'column':'pop_max','scheme':styling.sunset(bins=3)},
    size =  {'column':'pop_max','bin_method':'quantiles','bins' : 4, 'min': 3, 'max':7}
)
cc.map(layers=[l,l2], interactive=False,zoom=4, lat=36.19, lng=-6.79)
Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe978e5d470>

Crazy queries

You can render fancy queries, more details here

In [16]:
query = '''
with -- first data
data as (
  SELECT * 
  FROM jsanz.ne_10m_populated_places_simple_7 
  WHERE 
(megacity >= 0.5 AND megacity <= 1)  AND featurecla IN ('Admin-0 capital','Admin-1 region capital','Admin-0 region capital','Admin-0 capital alt')
), -- from dubai
origin as (
  select *
  from jsanz.ne_10m_populated_places_simple_7
  where cartodb_id = 7263
), -- cities closer to 14000 Km
dests as (
  select d.*,
  ST_Distance(
    o.the_geom::geography,
    d.the_geom::geography
  )::int distance
  from data d, origin o
  where 
    ST_DWithin( o.the_geom::geography, d.the_geom::geography, 14000000 ) 
),
geoms as (
    select 
    dests.cartodb_id,  dests.name, dests.adm0name, dests.distance,
    st_transform(
      st_segmentize(
        st_makeline(
          origin.the_geom,
          dests.the_geom
        )::geography,
        10000
       )::geometry,
      3857) the_geom_webmercator
    from origin,dests
)
select *, st_transform(the_geom_webmercator,4326) as the_geom from geoms
'''
cc.map(QueryLayer(query), interactive=False, zoom=2, lat=19.9, lng=29.5)
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe97a6cb128>

Create a new table on CARTO

In [17]:
df_spain = df[(df['adm0_a3'] == 'ESP')]
df_spain['name'].head()
Out[17]:
cartodb_id
2361                     Lorca
2362                 Cartagena
2363                    Oviedo
2364    Santiago de Compostela
2365                   Badajoz
Name: name, dtype: object
In [18]:
cc.write(df_spain, 'places_spain', overwrite=True)
Table successfully written to CARTO: https://jsanz.carto.com/dataset/places_spain
In [19]:
cc.query('SELECT DISTINCT adm0_a3 FROM places_spain')
Out[19]:
adm0_a3
0 ESP

Modify schema and data

Drop a column

In [20]:
df_spain = df[(df['adm0_a3'] == 'ESP')]
df_spain = df_spain.drop('adm0_a3', 1)
cc.write(df_spain, 'places_spain', overwrite=True)
Table successfully written to CARTO: https://jsanz.carto.com/dataset/places_spain
In [21]:
try:
    cc.query('SELECT DISTINCT adm0_a3 FROM places_spain')
except Exception as e:
    print(e)
['column "adm0_a3" does not exist']

Add València as an alternate name for the city of Valencia

In [22]:
vlc_id =  df_spain[df.apply(lambda x: x['name'] == 'Valencia', axis=1)].index.values[0]
df_spain = df_spain.set_value(vlc_id,'cityalt','València')
In [23]:
# THE FUTURE
# cc.sync(df_spain,'places_spain')

# THE PRESENT
cc.write(df_spain, 'places_spain', overwrite=True)
Table successfully written to CARTO: https://jsanz.carto.com/dataset/places_spain
In [24]:
cc.query('''SELECT name,cityalt from places_spain WHERE cartodb_id = {}'''.format(vlc_id))
Out[24]:
cityalt name
0 València Valencia

Delete the table

In [25]:
cc.delete('places_spain')

But there's more

Take a look into the documentation for other functions related with CARTO Data Observatory. All these features are is still under active development, things may change quickly.