The motiviation was duplicating some of Looker's bar charts when the data itself was still a bunch of CSV files, MySQL queries, and a bunch of API calls glued together
%matplotlib inline
import pandas as pd
from collections import OrderedDict
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
sns.set_style('whitegrid')
# start with some dummy data
df = pd.DataFrame({
'date':['2019-08-01','2019-09-01'],
'website_traffic':[530771,558652],
'new_accounts':[15720,15900],
'account_verify':[12670,13884],
'product_activated':[10812,12909],
'paid_conversion':[654,908],
'some_other_metric':[13270,13678]
})
df
df['date'] = pd.to_datetime(df['date'],format='%Y-%m-%d')
df.info()
# define only the KPIs we want to see (since there are cases where other metrics are present in the dataframe)
all_kpis = ['website_traffic','new_accounts','account_verify','product_activated','paid_conversion']
all_names = tuple([x.replace('_','\n').title() for x in all_kpis])
def get_it_in_order(df,mask,month):
"""
Biggest assumption: there is a 'date' column (not index!) in the dataframe that is *literally named* 'date'
Accepts entire dataframe
Supply a column mask
Supply a list of column names
"""
kpis = OrderedDict()
frame_2_dict = df.set_index('date').loc[month,mask].to_dict()
for i in mask:
kpis[i] = frame_2_dict[i]
return kpis
def add_arrow(x_adjust,y_adjust,ratio):
bbox_props = dict(boxstyle="rarrow,pad=0.5", facecolor="grey", edgecolor="grey", alpha=0.75, lw=1)
return plt.text(x_adjust,y_adjust,'{0:.1f}%'.format(ratio * 100),
verticalalignment='center',
horizontalalignment='left',
fontsize=16,
weight='black',
color='white',
bbox=bbox_props)
def plot_acquisition_funnel(df, month, names, metrics):
data = get_it_in_order(df, metrics, month)
count = range(len(data))
fig, ax = plt.subplots(figsize=(10, 6))
rects = ax.bar(count, data.values(), width=.5, align='center', color=sns.color_palette("Blues", len(data)))
plt.xticks(count, names)
plt.ylim(bottom=0, top=data[metrics[1]]*2)
plt.title('{0} Acquisition Funnel'.format(month), fontsize=16)
plt.tick_params(labelsize=16)
add_arrow(0.27, data[metrics[3]]/1.7, data[metrics[1]]/data[metrics[0]])
add_arrow(1.27, data[metrics[3]]/1.7, data[metrics[2]]/data[metrics[1]])
add_arrow(2.27, data[metrics[3]]/1.7, data[metrics[3]]/data[metrics[2]])
add_arrow(3.27, data[metrics[3]]/1.7, data[metrics[4]]/data[metrics[3]])
for rect, val in zip(rects,data.values()):
height = rect.get_height()
if height > 250:
label_color = 'grey'
else:
label_color = 'white'
ax.text(rect.get_x() + rect.get_width()/2,250,'{:,}'.format(val),ha='center', va='bottom',fontsize=12,weight='bold',color=label_color)
plt.grid(False)
plt.show()
plot_acquisition_funnel(df,'2019-08-01',all_names,all_kpis)