import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
df_ta_bar_reviews = pd.read_excel("ta_bar_reviews.xlsx")
df_ta_bar_reviews.date = pd.to_datetime(df_ta_bar_reviews.date)
print(df_ta_bar_reviews.dtypes)
df_ta_bar_reviews.head()
def first_of_month(x):
return x.replace(day=1)
df_ta_bar_reviews['first_of_month'] = df_ta_bar_reviews.date.apply(first_of_month)
for b in df_ta_bar_reviews.bar.unique():
print(b, \
df_ta_bar_reviews[df_ta_bar_reviews.bar==b]['bar'].count(), \
np.round(df_ta_bar_reviews[df_ta_bar_reviews.bar==b]['rating'].mean(), 3))
def month_weight(x, w=24):
return np.max((w - math.floor(x), 0))
def latest_weighted_rating(df, bar_field, bar_name, date_field, rating_field):
df_bar = df[df[bar_field] == bar_name]
max_date = df_bar[date_field].max()
m_weight = ((max_date - df_bar[date_field])/(np.timedelta64(1, 'M'))).apply(month_weight)
return np.round(np.sum(m_weight * df_bar[rating_field]) / float(np.sum(m_weight)), 3)
latest_ratings = []
for b in df_ta_bar_reviews.bar.unique():
latest_ratings.append({'bar': b, \
'w_avg': latest_weighted_rating(df_ta_bar_reviews, "bar", b, "first_of_month", "rating"), \
's_avg': np.round(df_ta_bar_reviews[df_ta_bar_reviews.bar==b]['rating'].mean(), 3)
})
df_latest_w_ratings = pd.DataFrame(latest_ratings).sort_values('w_avg', ascending=False).reset_index(drop=True)
df_latest_w_ratings['per_dif'] = np.round( \
(( df_latest_w_ratings.w_avg - df_latest_w_ratings.s_avg ) / df_latest_w_ratings.s_avg), 3)
df_latest_w_ratings
df_top_hops = df_ta_bar_reviews[df_ta_bar_reviews.bar=="Top_Hops"]
df_top_hops.head(15)
df_top_hops_grouped = pd.concat([df_top_hops[['first_of_month', 'rating']].groupby(['first_of_month']).mean(), \
df_top_hops[['first_of_month', 'rating']].groupby(['first_of_month']).count()], \
axis=1)
df_top_hops_grouped.columns = ['rating_mean', 'rating_count']
df_top_hops_grouped.sort_index(ascending=False, inplace=True)
df_top_hops_grouped.head()
m_weight = pd.Series( ( df_top_hops_grouped.index.max() - df_top_hops_grouped.index ) / ( np.timedelta64(1, 'M') ) ).apply(month_weight)
m_rating = df_top_hops_grouped.rating_mean.values * df_top_hops_grouped.rating_count.values
m_denom = np.sum( m_weight * df_top_hops_grouped.rating_count.values )
w_rating = np.sum( m_weight * m_rating ) / m_denom
w_rating
moving_weighted_ratings = []
for row in range(df_top_hops_grouped.shape[0]):
m_weight = pd.Series((df_top_hops_grouped.iloc[row:].index.max() - df_top_hops_grouped.iloc[row:].index) / ( np.timedelta64(1, 'M') )).apply(month_weight)
m_rating = df_top_hops_grouped.iloc[row:]['rating_mean'].values * df_top_hops_grouped.iloc[row:]['rating_count'].values
m_denom = np.sum( m_weight * df_top_hops_grouped.iloc[row:]['rating_count'].values )
w_rating = np.round( (np.sum( m_weight * m_rating ) / m_denom), 3 )
moving_weighted_ratings.append( w_rating )
moving_weighted_ratings[:10]
df_top_hops_grouped['moving_weighted_ratings'] = moving_weighted_ratings
df_top_hops_grouped.head(10)
df_top_hops_grouped.moving_weighted_ratings.plot()
df_top_hops_grouped = pd.concat([df_top_hops[['first_of_month', 'rating']].groupby(['first_of_month']).mean(), \
df_top_hops[['first_of_month', 'rating']].groupby(['first_of_month']).count()], \
axis=1)
df_top_hops_grouped.columns = ['rating_mean', 'rating_count']
def group_ratings_by_date(df, date_field, rating_field):
df_grouped = pd.concat([df[[date_field, rating_field]].groupby([date_field]).mean(), \
df[[date_field, rating_field]].groupby([date_field]).count()], \
axis=1)
df_grouped.columns = ['rating_mean', 'rating_count']
return df_grouped
df_top_hops_grouped = group_ratings_by_date(df_top_hops, 'first_of_month', 'rating')
df_top_hops_grouped.head()
def expand_dates(df, index=True, date_field=False):
if index:
return pd.Series(pd.date_range(df.index.min(), df.index.max().replace(month = df.index.max().month + 1), freq="M")).apply(first_of_month)
else:
return pd.Series(pd.date_range(df[date_field].min(), df[date_field].max().replace(month = df[date_field].max().month + 1), freq="M")).apply(first_of_month)
expanded_date_series = expand_dates(df_top_hops_grouped)
df_exp_dates = pd.DataFrame({'bar': len(expanded_date_series) * ['Top_Hops']}, index=expanded_date_series)
df_exp_dates.sort_index(ascending=False, inplace=True)
df_exp_dates.head()
df_top_hops_grouped = pd.concat([df_exp_dates, df_top_hops_grouped], axis=1).fillna(0)
df_top_hops_grouped.sort_index(ascending=False, inplace=True)
df_top_hops_grouped.head(10)
def moving_weighted_ratings(df):
ratings = []
for row in range(df.shape[0]):
m_weight = pd.Series((df.iloc[row:].index.max() - df.iloc[row:].index) / ( np.timedelta64(1, 'M') )).apply(month_weight)
m_rating = df.iloc[row:]['rating_mean'].values * df.iloc[row:]['rating_count'].values
m_denom = np.sum( m_weight * df.iloc[row:]['rating_count'].values )
w_rating = np.round( (np.sum( m_weight * m_rating ) / m_denom), 3 )
ratings.append( w_rating )
return ratings
moving_weighted_ratings(df_top_hops_grouped)[:10]
df_top_hops_grouped['moving_weighted_ratings'] = moving_weighted_ratings(df_top_hops_grouped)
df_top_hops_grouped.head(10)
df_top_hops_grouped.moving_weighted_ratings.plot()
all_date_series = expand_dates(df_ta_bar_reviews, False, 'first_of_month')
df_all_dates = pd.DataFrame({'p_holder': np.zeros(len(all_date_series))}, index=all_date_series)
df_all_dates.sort_index(ascending=False, inplace=True)
df_all_dates.head()
df_bar_grouped = group_ratings_by_date(df_ta_bar_reviews[df_ta_bar_reviews.bar=='230_Fifth'], 'first_of_month', 'rating')
expanded_date_series = expand_dates(df_bar_grouped)
df_exp_dates = pd.DataFrame({'bar': len(expanded_date_series) * ['230_Fifth']}, index=expanded_date_series)
df_bar_grouped = pd.concat([df_exp_dates, df_bar_grouped], axis=1).fillna(0)
df_bar_grouped.sort_index(ascending=False, inplace=True)
df_bar_grouped['230_Fifth'] = moving_weighted_ratings(df_bar_grouped)
df_bar_grouped.head(10)
def bar_moving_weighted_ratings(df, bar_field, bar_name, date_field, rating_field):
df_bar_grouped = group_ratings_by_date(df[df[bar_field] == bar_name], date_field, rating_field)
expanded_date_series = expand_dates(df_bar_grouped)
df_exp_dates = pd.DataFrame({'bar': len(expanded_date_series) * [bar_name]}, index=expanded_date_series)
df_bar_grouped = pd.concat([df_exp_dates, df_bar_grouped], axis=1).fillna(0)
df_bar_grouped.sort_index(ascending=False, inplace=True)
df_bar_grouped[bar_name] = moving_weighted_ratings(df_bar_grouped)
return df_bar_grouped
bar_moving_weighted_ratings(df_ta_bar_reviews, 'bar', '230_Fifth', 'first_of_month', 'rating').head(10)
for b in df_ta_bar_reviews.bar.unique():
df_bar_moving_weighted_ratings = bar_moving_weighted_ratings(df_ta_bar_reviews, 'bar', b, 'first_of_month', 'rating')
print(b, '\t', df_bar_moving_weighted_ratings[b][0])
df_all_dates = pd.concat([df_all_dates, df_bar_moving_weighted_ratings[b]], axis=1).fillna(0)
df_all_dates.sort_index(ascending=False, inplace=True)
df_all_dates.head(20)
# df_all_dates.to_excel('ta_bar_reviews_weighted_moving_avg.xlsx')
def bar_moving_weighted_ratings_macro_date_range(df, bar_field, bar_name, date_field, rating_field):
df_bar_grouped = group_ratings_by_date(df[df[bar_field] == bar_name], date_field, rating_field)
# change 1
df_macro_dates_grouped = group_ratings_by_date(df, date_field, rating_field)
# change 2
expanded_date_series = expand_dates(df_macro_dates_grouped)
df_exp_dates = pd.DataFrame({'bar': len(expanded_date_series) * [bar_name]}, index=expanded_date_series)
df_bar_grouped = pd.concat([df_exp_dates, df_bar_grouped], axis=1).fillna(0)
df_bar_grouped.sort_index(ascending=False, inplace=True)
df_bar_grouped[bar_name] = moving_weighted_ratings(df_bar_grouped)
return df_bar_grouped
bar_moving_weighted_ratings_macro_date_range(df_ta_bar_reviews, 'bar', 'Earl_s_Beer_and_Cheese', 'first_of_month', 'rating').head(10)
all_date_series = expand_dates(df_ta_bar_reviews, False, 'first_of_month')
df_all_dates = pd.DataFrame({'p_holder': np.zeros(len(all_date_series))}, index=all_date_series)
df_all_dates.sort_index(ascending=False, inplace=True)
# df_all_dates.head()
for b in df_ta_bar_reviews.bar.unique():
df_bar_moving_weighted_ratings = bar_moving_weighted_ratings_macro_date_range(df_ta_bar_reviews, 'bar', b, 'first_of_month', 'rating')
print(b, '\t', df_bar_moving_weighted_ratings[b][0])
df_all_dates = pd.concat([df_all_dates, df_bar_moving_weighted_ratings[b]], axis=1).fillna(0)
df_all_dates.sort_index(ascending=False, inplace=True)
df_all_dates.head()
# df_all_dates.to_excel('ta_bar_reviews_weighted_moving_avg_macro_date_range.xlsx')
df_all_dates.Broome_Street_Bar.plot()