#!/usr/bin/env python # coding: utf-8 # [![img](..\ep8_2018-57x57.png)](http://endlesspint.com/) # # # Reseen By Us # # **post @** [endlesspint.com](http://endlesspint.com/2019-08-02-re-seen-by-us-bar-reviews/) # In[1]: import numpy as np import pandas as pd import math import matplotlib.pyplot as plt plt.style.use('ggplot') get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: df_ta_bar_reviews = pd.read_excel("ta_bar_reviews.xlsx") df_ta_bar_reviews.date = pd.to_datetime(df_ta_bar_reviews.date) print(df_ta_bar_reviews.dtypes) df_ta_bar_reviews.head() # In[3]: def first_of_month(x): return x.replace(day=1) df_ta_bar_reviews['first_of_month'] = df_ta_bar_reviews.date.apply(first_of_month) # In[4]: for b in df_ta_bar_reviews.bar.unique(): print(b, \ df_ta_bar_reviews[df_ta_bar_reviews.bar==b]['bar'].count(), \ np.round(df_ta_bar_reviews[df_ta_bar_reviews.bar==b]['rating'].mean(), 3)) # In[5]: def month_weight(x, w=24): return np.max((w - math.floor(x), 0)) def latest_weighted_rating(df, bar_field, bar_name, date_field, rating_field): df_bar = df[df[bar_field] == bar_name] max_date = df_bar[date_field].max() m_weight = ((max_date - df_bar[date_field])/(np.timedelta64(1, 'M'))).apply(month_weight) return np.round(np.sum(m_weight * df_bar[rating_field]) / float(np.sum(m_weight)), 3) latest_ratings = [] for b in df_ta_bar_reviews.bar.unique(): latest_ratings.append({'bar': b, \ 'w_avg': latest_weighted_rating(df_ta_bar_reviews, "bar", b, "first_of_month", "rating"), \ 's_avg': np.round(df_ta_bar_reviews[df_ta_bar_reviews.bar==b]['rating'].mean(), 3) }) df_latest_w_ratings = pd.DataFrame(latest_ratings).sort_values('w_avg', ascending=False).reset_index(drop=True) df_latest_w_ratings['per_dif'] = np.round( \ (( df_latest_w_ratings.w_avg - df_latest_w_ratings.s_avg ) / df_latest_w_ratings.s_avg), 3) df_latest_w_ratings # ## confirm results from grouped rating approach (sample) # In[6]: df_top_hops = df_ta_bar_reviews[df_ta_bar_reviews.bar=="Top_Hops"] df_top_hops.head(15) # In[7]: df_top_hops_grouped = pd.concat([df_top_hops[['first_of_month', 'rating']].groupby(['first_of_month']).mean(), \ df_top_hops[['first_of_month', 'rating']].groupby(['first_of_month']).count()], \ axis=1) df_top_hops_grouped.columns = ['rating_mean', 'rating_count'] df_top_hops_grouped.sort_index(ascending=False, inplace=True) df_top_hops_grouped.head() # ## confirmed # In[8]: m_weight = pd.Series( ( df_top_hops_grouped.index.max() - df_top_hops_grouped.index ) / ( np.timedelta64(1, 'M') ) ).apply(month_weight) m_rating = df_top_hops_grouped.rating_mean.values * df_top_hops_grouped.rating_count.values m_denom = np.sum( m_weight * df_top_hops_grouped.rating_count.values ) w_rating = np.sum( m_weight * m_rating ) / m_denom w_rating # ## cycle through dates/index for moving weighted rating # ## (ignore missing months for time being) # In[9]: moving_weighted_ratings = [] for row in range(df_top_hops_grouped.shape[0]): m_weight = pd.Series((df_top_hops_grouped.iloc[row:].index.max() - df_top_hops_grouped.iloc[row:].index) / ( np.timedelta64(1, 'M') )).apply(month_weight) m_rating = df_top_hops_grouped.iloc[row:]['rating_mean'].values * df_top_hops_grouped.iloc[row:]['rating_count'].values m_denom = np.sum( m_weight * df_top_hops_grouped.iloc[row:]['rating_count'].values ) w_rating = np.round( (np.sum( m_weight * m_rating ) / m_denom), 3 ) moving_weighted_ratings.append( w_rating ) moving_weighted_ratings[:10] # In[10]: df_top_hops_grouped['moving_weighted_ratings'] = moving_weighted_ratings df_top_hops_grouped.head(10) # In[11]: df_top_hops_grouped.moving_weighted_ratings.plot() # ## fill in missing months # In[12]: df_top_hops_grouped = pd.concat([df_top_hops[['first_of_month', 'rating']].groupby(['first_of_month']).mean(), \ df_top_hops[['first_of_month', 'rating']].groupby(['first_of_month']).count()], \ axis=1) df_top_hops_grouped.columns = ['rating_mean', 'rating_count'] # In[13]: def group_ratings_by_date(df, date_field, rating_field): df_grouped = pd.concat([df[[date_field, rating_field]].groupby([date_field]).mean(), \ df[[date_field, rating_field]].groupby([date_field]).count()], \ axis=1) df_grouped.columns = ['rating_mean', 'rating_count'] return df_grouped df_top_hops_grouped = group_ratings_by_date(df_top_hops, 'first_of_month', 'rating') df_top_hops_grouped.head() # In[14]: def expand_dates(df, index=True, date_field=False): if index: return pd.Series(pd.date_range(df.index.min(), df.index.max().replace(month = df.index.max().month + 1), freq="M")).apply(first_of_month) else: return pd.Series(pd.date_range(df[date_field].min(), df[date_field].max().replace(month = df[date_field].max().month + 1), freq="M")).apply(first_of_month) expanded_date_series = expand_dates(df_top_hops_grouped) df_exp_dates = pd.DataFrame({'bar': len(expanded_date_series) * ['Top_Hops']}, index=expanded_date_series) df_exp_dates.sort_index(ascending=False, inplace=True) df_exp_dates.head() # In[15]: df_top_hops_grouped = pd.concat([df_exp_dates, df_top_hops_grouped], axis=1).fillna(0) df_top_hops_grouped.sort_index(ascending=False, inplace=True) df_top_hops_grouped.head(10) # In[16]: def moving_weighted_ratings(df): ratings = [] for row in range(df.shape[0]): m_weight = pd.Series((df.iloc[row:].index.max() - df.iloc[row:].index) / ( np.timedelta64(1, 'M') )).apply(month_weight) m_rating = df.iloc[row:]['rating_mean'].values * df.iloc[row:]['rating_count'].values m_denom = np.sum( m_weight * df.iloc[row:]['rating_count'].values ) w_rating = np.round( (np.sum( m_weight * m_rating ) / m_denom), 3 ) ratings.append( w_rating ) return ratings moving_weighted_ratings(df_top_hops_grouped)[:10] # In[17]: df_top_hops_grouped['moving_weighted_ratings'] = moving_weighted_ratings(df_top_hops_grouped) df_top_hops_grouped.head(10) # In[18]: df_top_hops_grouped.moving_weighted_ratings.plot() # ## let's run for all, merge and... # In[19]: all_date_series = expand_dates(df_ta_bar_reviews, False, 'first_of_month') df_all_dates = pd.DataFrame({'p_holder': np.zeros(len(all_date_series))}, index=all_date_series) df_all_dates.sort_index(ascending=False, inplace=True) df_all_dates.head() # In[20]: df_bar_grouped = group_ratings_by_date(df_ta_bar_reviews[df_ta_bar_reviews.bar=='230_Fifth'], 'first_of_month', 'rating') expanded_date_series = expand_dates(df_bar_grouped) df_exp_dates = pd.DataFrame({'bar': len(expanded_date_series) * ['230_Fifth']}, index=expanded_date_series) df_bar_grouped = pd.concat([df_exp_dates, df_bar_grouped], axis=1).fillna(0) df_bar_grouped.sort_index(ascending=False, inplace=True) df_bar_grouped['230_Fifth'] = moving_weighted_ratings(df_bar_grouped) df_bar_grouped.head(10) # In[21]: def bar_moving_weighted_ratings(df, bar_field, bar_name, date_field, rating_field): df_bar_grouped = group_ratings_by_date(df[df[bar_field] == bar_name], date_field, rating_field) expanded_date_series = expand_dates(df_bar_grouped) df_exp_dates = pd.DataFrame({'bar': len(expanded_date_series) * [bar_name]}, index=expanded_date_series) df_bar_grouped = pd.concat([df_exp_dates, df_bar_grouped], axis=1).fillna(0) df_bar_grouped.sort_index(ascending=False, inplace=True) df_bar_grouped[bar_name] = moving_weighted_ratings(df_bar_grouped) return df_bar_grouped bar_moving_weighted_ratings(df_ta_bar_reviews, 'bar', '230_Fifth', 'first_of_month', 'rating').head(10) # In[22]: for b in df_ta_bar_reviews.bar.unique(): df_bar_moving_weighted_ratings = bar_moving_weighted_ratings(df_ta_bar_reviews, 'bar', b, 'first_of_month', 'rating') print(b, '\t', df_bar_moving_weighted_ratings[b][0]) df_all_dates = pd.concat([df_all_dates, df_bar_moving_weighted_ratings[b]], axis=1).fillna(0) # In[23]: df_all_dates.sort_index(ascending=False, inplace=True) df_all_dates.head(20) # In[24]: # df_all_dates.to_excel('ta_bar_reviews_weighted_moving_avg.xlsx') # ## need to concat each bar on entire date range first and then work backwards in ratings, this will remove sudden zeroes in June # In[29]: def bar_moving_weighted_ratings_macro_date_range(df, bar_field, bar_name, date_field, rating_field): df_bar_grouped = group_ratings_by_date(df[df[bar_field] == bar_name], date_field, rating_field) # change 1 df_macro_dates_grouped = group_ratings_by_date(df, date_field, rating_field) # change 2 expanded_date_series = expand_dates(df_macro_dates_grouped) df_exp_dates = pd.DataFrame({'bar': len(expanded_date_series) * [bar_name]}, index=expanded_date_series) df_bar_grouped = pd.concat([df_exp_dates, df_bar_grouped], axis=1).fillna(0) df_bar_grouped.sort_index(ascending=False, inplace=True) df_bar_grouped[bar_name] = moving_weighted_ratings(df_bar_grouped) return df_bar_grouped bar_moving_weighted_ratings_macro_date_range(df_ta_bar_reviews, 'bar', 'Earl_s_Beer_and_Cheese', 'first_of_month', 'rating').head(10) # In[31]: all_date_series = expand_dates(df_ta_bar_reviews, False, 'first_of_month') df_all_dates = pd.DataFrame({'p_holder': np.zeros(len(all_date_series))}, index=all_date_series) df_all_dates.sort_index(ascending=False, inplace=True) # df_all_dates.head() for b in df_ta_bar_reviews.bar.unique(): df_bar_moving_weighted_ratings = bar_moving_weighted_ratings_macro_date_range(df_ta_bar_reviews, 'bar', b, 'first_of_month', 'rating') print(b, '\t', df_bar_moving_weighted_ratings[b][0]) df_all_dates = pd.concat([df_all_dates, df_bar_moving_weighted_ratings[b]], axis=1).fillna(0) # In[32]: df_all_dates.sort_index(ascending=False, inplace=True) df_all_dates.head() # In[33]: # df_all_dates.to_excel('ta_bar_reviews_weighted_moving_avg_macro_date_range.xlsx') # In[35]: df_all_dates.Broome_Street_Bar.plot() # ## some bars go for several months w/o reviews, perhaps suspend rating after n-months # In[ ]: # In[ ]: