#!/usr/bin/env python # coding: utf-8 # # Basic cartoframes usage # # `cartoframes` lets you use CARTO in a Python environment so that you can do all of your analysis and mapping in, for example, a Jupyter notebook. `cartoframes` allows you to use CARTO's functionality for data analysis, storage, location services like routing and geocoding, and visualization. # # You can view this notebook best on `nbviewer` here: # It is recommended to download this notebook and use on your computer instead so you can more easily explore the functionality of `cartoframes`. # # To get started, let's load the required packages, and set credentials. # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import cartoframes from cartoframes import Credentials import pandas as pd USERNAME = 'eschbacher' # <-- replace with your username APIKEY = 'abcdefg' # <-- your CARTO API key creds = Credentials(username=USERNAME, key=APIKEY) cc = cartoframes.CartoContext(creds=creds) # ## `cc.read` # # `CartoContext` has several methods for interacting with [CARTO](https://carto.com) in a Python environment. `CartoContext.read` allows you to pull a dataset stored on CARTO into a [pandas](http://pandas.pydata.org/) DataFrame. In the cell below, we use `cc.read` to get the table `brooklyn_poverty` from a CARTO account. You can get a CSV of the table here for uploading to your CARTO account: # # # In[2]: from cartoframes.examples import read_taxi, read_brooklyn_poverty cc.write(read_brooklyn_poverty(), 'my_taxi_dataset_deleteme', overwrite=True) # In[3]: # Get a CARTO table as a pandas DataFrame df = cc.read('my_taxi_dataset_deleteme') df.head() # Notice that: # # * the index of the DataFrame is the same as the index of the CARTO table (`cartodb_id`) # * `the_geom` column stores the geometry. This can be decoded if we set the `decode_geom=True` flag in `cc.read`, which requires the library `shapely`. # * We have several numeric columns # * SQL `null` values are represented as `np.nan` # # Other things to notice: # In[4]: df.dtypes # The `dtype` of each column is a mapping of the column type on CARTO. For example, `numeric` will map to `float64`, `text` will map to `object` (pandas string representation), `timestamp` will map to `datetime64[ns]`, etc. The reverse happens if a DataFrame is sent to CARTO. # ## `cc.map` # # Now that we can inspect the data, we can map it to see how the values change over the geography. We can use the `cc.map` method for this purpose. # # `cc.map` takes a `layers` argument which specifies the data layers that are to be visualized. They can be imported from `cartoframes` as below. # # There are different types of layers: # # * `Layer` for visualizing CARTO tables # * `QueryLayer` for visualizing arbitrary queries from tables in user's CARTO account # * `BaseMap` for specifying the base map to be used # # Each of the layers has different styling options. `Layer` and `QueryLayer` take the same styling arguments, and `BaseMap` can be specified to be light/dark and options on label placement. # # Maps can be `interactive` or not. Set interactivity with the `interactive` with `True` or `False`. If the map is static (not interactive), it will be embedded in the notebook as either a `matplotlib` axis or `IPython.Image`. Either way, the image will be transported with the notebook. Interactive maps will be embedded zoom and pan-able maps. # In[5]: from cartoframes import Layer, styling, BaseMap l = Layer('my_taxi_dataset_deleteme', color={'column': 'poverty_per_pop', 'scheme': styling.sunset(7)}) cc.map(layers=l, interactive=False) # ### Multiple variables together # In[6]: table = 'my_taxi_dataset_deleteme' cols = [ 'pop_determined_poverty_status_2011_2015', 'poverty_per_pop', 'walked_to_work_2011_2015_per_pop', 'total_pop_2011_2015' ] fig, axs = plt.subplots(2, 2, figsize=(12, 12)) for idx, col in enumerate(cols): cc.map(layers=[BaseMap('dark'), Layer(table, color={'column': col, 'scheme': styling.sunset(7, 'quantiles')})], ax=axs[idx // 2][idx % 2], zoom=11, lng=-73.9476, lat=40.6437, interactive=False, size=(432, 432)) axs[idx // 2][idx % 2].set_title(col) fig.tight_layout() plt.show() # ## NYC Taxi Dataset # # Let's explore a typical `cartoframes` workflow using data on NYC taxis. # # To get the data into CARTO, we can: # 1. Use `pandas` to grab the data from the cartoframes example account # 2. Send it to your CARTO account using `cc.write`, specifying the `lng`/`lat` columns you want to use for visualization # 3. Set `overwrite=True` to replace an existing dataset if it exists # 4. Refresh our `df` with the CARTO-fied version using `cc.read`` # In[7]: # read in a CSV of NYC taxi data from cartoframes example datasets from cartoframes.examples import read_taxi df = read_taxi() # show first five rows to see what we've got df.head() # In[8]: # send it to carto so we can map it # specify the columns we want to have as a point (pickup location) cc.write(df, 'taxi_50k', lnglat=('pickup_longitude', 'pickup_latitude'), overwrite=True) # read the fresh carto-fied version df = cc.read('taxi_50k') # Take a look at the data on a map. # In[11]: from cartoframes import Layer cc.map(layers=Layer('taxi_50k'), interactive=False) # Oops, there are some zero-valued long/lats in there, so the results are going to [null island](https://en.wikipedia.org/wiki/Null_Island). Let's remove them. # In[12]: # select only the rows which are not at (0,0) df = df[(df['pickup_longitude'] != 0) | (df['pickup_latitude'] != 0)] # send back up to CARTO cc.write(df, 'taxi_50k', overwrite=True, lnglat=('pickup_longitude', 'pickup_latitude')) # Instead of using pandas, we could remove those rows on the database by using SQL. # # ```python # cc.query(''' # DELETE FROM taxi_50k # WHERE pickup_longitude = 0 and pickup_latitude = 0 # ''') # ``` # In[16]: # Let's take a look at what's going on, styled by the fare amount cc.map(layers=Layer('taxi_50k', size=4, color={'column': 'fare_amount', 'scheme': styling.sunset(7)}), interactive=True) # We can use the `zoom=..., lng=..., lat=...` information in the embedded interactive map to help us get static snapshots of the regions we're interested in. For example, JFK airport is around `zoom=12, lng=-73.7880, lat=40.6629`. We can paste that information as arguments in `cc.map` to generate a static snapshot of the data there. # In[17]: # Let's take a look at what's going on at JFK airport, styled by the fare amount, and STATIC cc.map(layers=Layer('taxi_50k', size=6, color={'column': 'fare_amount', 'scheme': styling.sunset(7)}), zoom=12, lng=-73.7880, lat=40.6629, interactive=False)