#!/usr/bin/env python # coding: utf-8 # ## Building a dashboard to plan a marketing campaign leveraging CARTO Data Observatory # # Combining different data sources to identify some patterns or understand some behavior in a specific location is a very typical use case in Spatial Data Science. # # In this notebook, we will build a dashboard combining different data from CARTO's Data Observatory to help identify the locations with specific characteristics described below. # # **Note:** This use case leverages premium datasets from [CARTO's Data Observatory](https://carto.com/spatial-data-catalog/). # # # ### Use case description # # A pharmaceutical lab wants to launch a new marketing campaign to promote a new line of personal care products for senior people in the city of Philadelphia, PA. They know their target group is characterized by: # - People over 60 # - Medium-high to high income # - High expenditure in personal care products and services # # # Given these characteristics, they would like to know which pharmacies and drug stores in the city of Philadelphia they should focus their efforts on. # # In order to identify the target drug stores and pharmacies, we will follow the following steps: # - [Get all pharmacies in Philadelphia](#section1) # - [Calculate their cathment areas using isochrones](#section2) # - [Enrich the isochrones with demographic, POI's, and consumption data](#section3) # - [Build the dashboard to help identify the pharmacies where the campaign can be more successful given the characteristics of the population within their catchment area](#section4) # # # ### 0. Setup # Import the packages we'll use. # In[1]: import geopandas as gpd import pandas as pd from cartoframes.auth import set_default_credentials from cartoframes.data.services import Isolines from cartoframes.data.observatory import * from cartoframes.viz import * from shapely.geometry import box pd.set_option('display.max_columns', None) # In order to be able to use the Data Observatory via CARTOframes, you need to set your CARTO account credentials first. # # Please, visit the [Authentication guide](https://carto.com/developers/cartoframes/guides/Authentication/) for further detail. # In[2]: from cartoframes.auth import set_default_credentials set_default_credentials('creds.json') # **Note about credentials** # # For security reasons, we recommend storing your credentials in an external file to prevent publishing them by accident when sharing your notebooks. You can get more information in the section _Setting your credentials_ of the [Authentication guide](https://carto.com/developers/cartoframes/guides/Authentication/). # # ### 1. Download all pharmacies in Philadelphia from the Data Observatory # Below is the bounding box of the area of study. # In[3]: dem_bbox = box(-75.229353,39.885501,-75.061124,39.997898) # We can get the pharmacies from [Pitney Bowes' Consumer Points of Interest](https://carto.com/spatial-data-catalog/browser/dataset/pb_consumer_po_62cddc04/) dataset. This is a premium dataset, so we first need to check that we are subscribed to it. # # Take a look at this template for more details on how to access and download a premium dataset. # In[4]: Catalog().subscriptions().datasets.to_dataframe() # #### Download and explore sample # # Pitney Bowes POI's are hierarchically classified (levels: trade division, group, class, sub class). # # Since we might not know which level can help us identify all pharmacies, we can start by downloading a sample for a smaller area to explore the dataset. For calculating the bounding box we use [bboxfinder](http://bboxfinder.com). # # We start by selecting our dataset and taking a quick look at its first 10 rows. # In[5]: dataset = Dataset.get('pb_consumer_po_62cddc04') # In[6]: dataset.head() # Let's now download a small sample to help us identify which of the four hierarchy variables gives us the pharmacies. # In[7]: sql_query = "SELECT * except(do_label) FROM $dataset$ WHERE ST_IntersectsBox(geom, -75.161723,39.962019,-75.149535,39.968071)" sample = dataset.to_dataframe(sql_query=sql_query) # In[8]: sample.head() # In[9]: sample['TRADE_DIVISION'].unique() # In[10]: sample.loc[sample['TRADE_DIVISION'] == 'DIVISION G. - RETAIL TRADE', 'GROUP'].unique() # In[11]: sample.loc[sample['TRADE_DIVISION'] == 'DIVISION G. - RETAIL TRADE', 'CLASS'].unique() # The class `DRUG STORES AND PROPRIETARY STORES` is the one we're looking for. # In[12]: sample.loc[sample['CLASS'] == 'DRUG STORES AND PROPRIETARY STORES', 'SUB_CLASS'].unique() # #### Download all pharmacies in the area of study # In[13]: sql_query = """SELECT * except(do_label) FROM $dataset$ WHERE CLASS = 'DRUG STORES AND PROPRIETARY STORES' AND ST_IntersectsBox(geom, -75.229353,39.885501,-75.061124,39.997898)""" ph_pharmacies = dataset.to_dataframe(sql_query=sql_query) # In[14]: ph_pharmacies.head() # The dataset contains different versions of the POI's tagged by the _do_date_ column. We are only inetrested in the latest version of each POI. # In[15]: ph_pharmacies = ph_pharmacies.sort_values(by='do_date', ascending=False).groupby('PB_ID').first().reset_index() # In[16]: ph_pharmacies.shape # #### Visualize the dataset # In[17]: Layer(ph_pharmacies, geom_col='geom', style=basic_style(opacity=0.75), popup_hover=popup_element('NAME')) # # ### 2. Calculate catchment areas # In order to know the characteristics of the potential customers of every pharmacy, we assume the majority of their clients live closeby. Therefore we will calculate **5-minute-by-car isochrones** and take them as their cathment areas. # # _Note_ catchment areas usually depend on whether it is a store in the downtown area or in the suburbs, or if it is reachable on foot or only by car. For this example, we will not make such distiction between pharmacies, but we strongly encourage you to do so on your analyses. As an example, [here](https://carto.com/blog/calculating-catchment-human-mobility-data/) we describe how to calculate catchment areas using human mobility data. # In[18]: iso_service = Isolines() isochrones_gdf, _ = iso_service.isochrones(ph_pharmacies, [300], mode='car', geom_col='geom') # In[19]: ph_pharmacies['iso_5car'] = isochrones_gdf.sort_values(by='source_id')['the_geom'].values # #### Visualize isochrones # # We'll only visualize the ten first isochrones to get a clean visualization. # In[20]: Map([Layer(ph_pharmacies.iloc[:10], geom_col='iso_5car', style=basic_style(opacity=0.1), legends=basic_legend('Catchment Areas')), Layer(ph_pharmacies.iloc[:10], geom_col='geom', popup_hover=popup_element('NAME'), legends=basic_legend('Pharmacies'))]) # # ### 3. Enrichment: Chacacterize catchment areas # # We'll now enrich the pharmacies catchment areas with demographics, POI's, and consumer spending data. # # For the enrichment, we will use the CARTOframes Enrichment class. This class contains the functionality to enrich polygons and points. # # Visit [CARTOframes Guides](https://carto.com/developers/cartoframes/guides/) for further detail. # In[21]: enrichment = Enrichment() # #### Demographics # # We will use AGS premium data. In particular, we will work with the dataset `ags_sociodemogr_f510a947` which contains [yearly demographics data from 2019](https://carto.com/spatial-data-catalog/browser/dataset/ags_sociodemogr_f510a947/). # ##### Variable selection # # Here we will enrich the pharmacies isochrones with: # - Population aged 60+ # - Household income # - Household income for population ages 65+ # In[22]: Catalog().country('usa').category('demographics').provider('ags').datasets.to_dataframe().head() # In[23]: dataset = Dataset.get('ags_sociodemogr_f510a947') # In[24]: dataset.head() # We explore the variables to identify the ones we're interested in. # # Variables in a dataset are uniquely identified by their slug. # In[25]: dataset.variables.to_dataframe().head() # We'll select: # - Population and population by age variables to identify number of people aged 60+ as a percentage of total population # - Average household income # - Average household income for porpulation aged 65+ # In[26]: vars_enrichment = ['POPCY_5e23b8f4', 'AGECY6064_d54c2315', 'AGECY6569_ad369d43', 'AGECY7074_74eb7531', 'AGECY7579_c91cb67', 'AGECY8084_ab1079a8', 'AGECYGT85_a0959a08', 'INCCYMEDHH_b80a7a7b', 'HINCYMED65_37a430a4', 'HINCYMED75_2ebf01e5'] # ##### Isochrone enrichment # In[29]: ph_pharmacies_enriched = enrichment.enrich_polygons( ph_pharmacies, variables=vars_enrichment, geom_col='iso_5car' ) # In[30]: ph_pharmacies_enriched.head() # In[31]: ph_pharmacies = ph_pharmacies_enriched.copy() # In[32]: ph_pharmacies['pop_60plus'] = ph_pharmacies[['AGECY8084', 'AGECYGT85', 'AGECY6569', 'AGECY7579', 'AGECY7074', 'AGECY6064']].sum(1) ph_pharmacies.drop(columns=['AGECY8084', 'AGECYGT85', 'AGECY6569', 'AGECY7579', 'AGECY7074', 'AGECY6064'], inplace=True) # #### Points of Interest # # We will use [Pitney Bowes' Consumer Points of Interest](https://carto.com/spatial-data-catalog/browser/dataset/pb_consumer_po_62cddc04/) premium dataset. # ##### Variable selection # # We are interested in knowing how many of the following POIs can be found in each isochrone: # - Beauty shops and beauty salons # - Gyms and other sports centers # # These POI's will be considered as an indicator of personal care awareness in a specific area. # The hierarchy classification variable `SUB_CLASS` variable allows us to identify beaty shops and salons (`BEAUTY SHOPS/BEAUTY SALON`) and gyms (`MEMBERSHIP SPORTS AND RECREATION CLUBS/CLUB AND ASSOCIATION - UNSPECIFIED`). # In[33]: sample.loc[sample['TRADE_DIVISION'] == 'DIVISION I. - SERVICES', 'SUB_CLASS'].unique() # ##### Isochrone enrichment # In order to count only Beauty Shops/Salons and Gyms, we will apply a filter to the enrichment. All filters are applied with an AND-like relationship. This means we need to run two independent enrichment calls, one for the beauty shops/salons and another one for the gyms. # In[34]: ph_pharmacies_enriched = enrichment.enrich_polygons( ph_pharmacies, variables=['SUB_CLASS_10243439'], aggregation='COUNT', geom_col='iso_5car', filters={Variable.get('SUB_CLASS_10243439').id : "= 'BEAUTY SHOPS/BEAUTY SALON'"} ) # In[35]: ph_pharmacies = ph_pharmacies_enriched.rename(columns={'SUB_CLASS_y':'n_beauty_pois'}) # In[36]: ph_pharmacies_enriched = enrichment.enrich_polygons( ph_pharmacies, variables=['SUB_CLASS_10243439'], aggregation='COUNT', geom_col='iso_5car', filters={Variable.get('SUB_CLASS_10243439').id : "= 'MEMBERSHIP SPORTS AND RECREATION CLUBS/CLUB AND ASSOCIATION - UNSPECIFIED'"} ) # In[37]: ph_pharmacies = ph_pharmacies_enriched.rename(columns={'SUB_CLASS':'n_gym_pois'}) # In[38]: ph_pharmacies['n_pois_personal_care'] = ph_pharmacies['n_beauty_pois'] + ph_pharmacies['n_gym_pois'] ph_pharmacies.drop(columns=['n_beauty_pois', 'n_gym_pois'], inplace=True) # #### Consumer spending # # For consumer spending, we will use AGS premium data. In particular, we will work with the dataset `ags_consumer_sp_dbabddfb` which contains the [latest version of yearly consumer data](https://carto.com/spatial-data-catalog/browser/dataset/ags_consumer_sp_dbabddfb/). # ##### Variable selection # # We are interested in spending in: # - Personal care services # - Personal care products # - Health care services # In[41]: dataset = Dataset.get('ags_consumer_sp_dbabddfb') # In[42]: dataset.variables.to_dataframe().head() # The variables we're interested in are: # - `XCYHC2` Health care services expenditure # - `XCYPC3` Personal care services expenditure # - `XCYPC4` Personal care products expenditure # In[43]: Variable.get('XCYHC2_18141567').to_dict() # In[44]: ph_pharmacies_enriched = enrichment.enrich_polygons( ph_pharmacies, variables=['XCYPC3_7d26d739', 'XCYPC4_e342429a', 'XCYHC2_18141567'], geom_col='iso_5car' ) # We rename the new columns to give them a more descriptive name. # In[45]: ph_pharmacies = ph_pharmacies_enriched.rename(columns={'XCYHC2':'health_care_services_exp', 'XCYPC3':'personal_care_services_exp', 'XCYPC4':'personal_care_products_exp'}) # In[46]: ph_pharmacies.head(2) # # ### 4. Dashboard # # Finally, with all the data gathered, we will build the dashboard and publish it so we can share it with our client/manager/colleague for them to explore it. # # This dashboard allows you to select a range of desired expenditure in care products, people aged 60+, household income, and so forth. Selecting the desired ranges will filter out pharmacies, so that in the end you can identify the target pharmacies for your marketing campaign. # In[47]: cmap = Map(Layer(ph_pharmacies, geom_col='geom', style=color_category_style('SIC8_DESCRIPTION', size=4, opacity=0.85, palette='safe', stroke_width=0.15), widgets=[formula_widget( 'PB_ID', operation='COUNT', title='Total number of pharmacies', description='Keep track of the total amount of pharmacies that meet the ranges selected on the widgets below'), histogram_widget( 'pop_60plus', title='Population 60+', description='Select a range of values to filter', buckets=15 ), histogram_widget( 'HINCYMED65', title='Household income 65-74', buckets=15 ), histogram_widget( 'HINCYMED75', title='Household income 75+', buckets=15 ), histogram_widget( 'n_pois_personal_care', title='Number of personal care POIs', buckets=15 ), histogram_widget( 'personal_care_products_exp', title='Expenditure in personal care products ($)', buckets=15 )], legends=color_category_legend( title='Pharmacies', description='Type of store'), popup_hover=[popup_element('NAME', title='Name')] ), viewport={'zoom': 11} ) # In[48]: cmap # #### Publish dashboard # In[49]: cmap.publish('ph_pharmacies_dashboard', password='MY_PASS', if_exists='replace')