#!/usr/bin/env python
# coding: utf-8

# [![img](..\ep8_2018-57x57.png)](http://endlesspint.com/)
# 
# # Reseen By Us
# 
# **post @** [endlesspint.com](http://endlesspint.com/2019-08-02-re-seen-by-us-bar-reviews/)

# In[1]:


import numpy as np
import pandas as pd

import math

import matplotlib.pyplot as plt
plt.style.use('ggplot')
get_ipython().run_line_magic('matplotlib', 'inline')


# In[2]:


df_ta_bar_reviews = pd.read_excel("ta_bar_reviews.xlsx")

df_ta_bar_reviews.date = pd.to_datetime(df_ta_bar_reviews.date)
print(df_ta_bar_reviews.dtypes)

df_ta_bar_reviews.head()


# In[3]:


def first_of_month(x):
    return x.replace(day=1)

df_ta_bar_reviews['first_of_month'] = df_ta_bar_reviews.date.apply(first_of_month)


# In[4]:


for b in df_ta_bar_reviews.bar.unique():
    print(b, \
          df_ta_bar_reviews[df_ta_bar_reviews.bar==b]['bar'].count(), \
          np.round(df_ta_bar_reviews[df_ta_bar_reviews.bar==b]['rating'].mean(), 3))


# In[5]:


def month_weight(x, w=24):
    return np.max((w - math.floor(x), 0))


def latest_weighted_rating(df, bar_field, bar_name, date_field, rating_field):
    df_bar = df[df[bar_field] == bar_name]
    max_date = df_bar[date_field].max()
    m_weight = ((max_date - df_bar[date_field])/(np.timedelta64(1, 'M'))).apply(month_weight)
    return np.round(np.sum(m_weight * df_bar[rating_field]) / float(np.sum(m_weight)), 3)

latest_ratings = []


for b in df_ta_bar_reviews.bar.unique():
    latest_ratings.append({'bar': b, \
                           'w_avg': latest_weighted_rating(df_ta_bar_reviews, "bar", b, "first_of_month", "rating"), \
                           's_avg': np.round(df_ta_bar_reviews[df_ta_bar_reviews.bar==b]['rating'].mean(), 3)
                          })

df_latest_w_ratings = pd.DataFrame(latest_ratings).sort_values('w_avg', ascending=False).reset_index(drop=True)
df_latest_w_ratings['per_dif'] = np.round( \
                                (( df_latest_w_ratings.w_avg - df_latest_w_ratings.s_avg ) / df_latest_w_ratings.s_avg), 3)

df_latest_w_ratings


# ## confirm results from grouped rating approach (sample)

# In[6]:


df_top_hops = df_ta_bar_reviews[df_ta_bar_reviews.bar=="Top_Hops"]
df_top_hops.head(15)


# In[7]:


df_top_hops_grouped = pd.concat([df_top_hops[['first_of_month', 'rating']].groupby(['first_of_month']).mean(), \
                                 df_top_hops[['first_of_month', 'rating']].groupby(['first_of_month']).count()], \
                                axis=1)

df_top_hops_grouped.columns = ['rating_mean', 'rating_count']
df_top_hops_grouped.sort_index(ascending=False, inplace=True)
df_top_hops_grouped.head()


# ## confirmed

# In[8]:


m_weight = pd.Series( ( df_top_hops_grouped.index.max() - df_top_hops_grouped.index ) / ( np.timedelta64(1, 'M') ) ).apply(month_weight)
m_rating = df_top_hops_grouped.rating_mean.values * df_top_hops_grouped.rating_count.values
m_denom  = np.sum( m_weight * df_top_hops_grouped.rating_count.values )

w_rating = np.sum( m_weight * m_rating ) / m_denom
w_rating


# ## cycle through dates/index for moving weighted rating 
# ## (ignore missing months for time being)

# In[9]:


moving_weighted_ratings = []

for row in range(df_top_hops_grouped.shape[0]):
    
    m_weight = pd.Series((df_top_hops_grouped.iloc[row:].index.max() - df_top_hops_grouped.iloc[row:].index) / ( np.timedelta64(1, 'M') )).apply(month_weight) 
    m_rating = df_top_hops_grouped.iloc[row:]['rating_mean'].values * df_top_hops_grouped.iloc[row:]['rating_count'].values
    m_denom  = np.sum( m_weight * df_top_hops_grouped.iloc[row:]['rating_count'].values )
    
    w_rating = np.round( (np.sum( m_weight * m_rating ) / m_denom), 3 )
    moving_weighted_ratings.append( w_rating )
    
moving_weighted_ratings[:10]


# In[10]:


df_top_hops_grouped['moving_weighted_ratings'] = moving_weighted_ratings
df_top_hops_grouped.head(10)


# In[11]:


df_top_hops_grouped.moving_weighted_ratings.plot()


# ## fill in missing months

# In[12]:


df_top_hops_grouped = pd.concat([df_top_hops[['first_of_month', 'rating']].groupby(['first_of_month']).mean(), \
                                 df_top_hops[['first_of_month', 'rating']].groupby(['first_of_month']).count()], \
                                axis=1)

df_top_hops_grouped.columns = ['rating_mean', 'rating_count']


# In[13]:


def group_ratings_by_date(df, date_field, rating_field):
    
    df_grouped = pd.concat([df[[date_field, rating_field]].groupby([date_field]).mean(), \
                            df[[date_field, rating_field]].groupby([date_field]).count()], \
                           axis=1)
    
    df_grouped.columns = ['rating_mean', 'rating_count']
    
    return df_grouped


df_top_hops_grouped = group_ratings_by_date(df_top_hops, 'first_of_month', 'rating')
df_top_hops_grouped.head()


# In[14]:


def expand_dates(df, index=True, date_field=False):
    if index:
        return pd.Series(pd.date_range(df.index.min(), df.index.max().replace(month = df.index.max().month + 1), freq="M")).apply(first_of_month)
    else:
        return pd.Series(pd.date_range(df[date_field].min(), df[date_field].max().replace(month = df[date_field].max().month + 1), freq="M")).apply(first_of_month)
    

expanded_date_series = expand_dates(df_top_hops_grouped)
df_exp_dates = pd.DataFrame({'bar': len(expanded_date_series) * ['Top_Hops']}, index=expanded_date_series)
df_exp_dates.sort_index(ascending=False, inplace=True)
df_exp_dates.head()


# In[15]:


df_top_hops_grouped = pd.concat([df_exp_dates, df_top_hops_grouped], axis=1).fillna(0)
df_top_hops_grouped.sort_index(ascending=False, inplace=True)
df_top_hops_grouped.head(10)


# In[16]:


def moving_weighted_ratings(df):

    ratings = []

    for row in range(df.shape[0]):

        m_weight = pd.Series((df.iloc[row:].index.max() - df.iloc[row:].index) / ( np.timedelta64(1, 'M') )).apply(month_weight) 
        m_rating = df.iloc[row:]['rating_mean'].values * df.iloc[row:]['rating_count'].values
        m_denom  = np.sum( m_weight * df.iloc[row:]['rating_count'].values )

        w_rating = np.round( (np.sum( m_weight * m_rating ) / m_denom), 3 )
        ratings.append( w_rating )

    return ratings

moving_weighted_ratings(df_top_hops_grouped)[:10]


# In[17]:


df_top_hops_grouped['moving_weighted_ratings'] = moving_weighted_ratings(df_top_hops_grouped)
df_top_hops_grouped.head(10)


# In[18]:


df_top_hops_grouped.moving_weighted_ratings.plot()


# ## let's run for all, merge and... 

# In[19]:


all_date_series = expand_dates(df_ta_bar_reviews, False, 'first_of_month')

df_all_dates = pd.DataFrame({'p_holder': np.zeros(len(all_date_series))}, index=all_date_series)
df_all_dates.sort_index(ascending=False, inplace=True)
df_all_dates.head()


# In[20]:


df_bar_grouped = group_ratings_by_date(df_ta_bar_reviews[df_ta_bar_reviews.bar=='230_Fifth'], 'first_of_month', 'rating')

expanded_date_series = expand_dates(df_bar_grouped)
df_exp_dates = pd.DataFrame({'bar': len(expanded_date_series) * ['230_Fifth']}, index=expanded_date_series)

df_bar_grouped = pd.concat([df_exp_dates, df_bar_grouped], axis=1).fillna(0)
df_bar_grouped.sort_index(ascending=False, inplace=True)

df_bar_grouped['230_Fifth'] = moving_weighted_ratings(df_bar_grouped)
df_bar_grouped.head(10)


# In[21]:


def bar_moving_weighted_ratings(df, bar_field, bar_name, date_field, rating_field):

    df_bar_grouped = group_ratings_by_date(df[df[bar_field] == bar_name], date_field, rating_field)

    expanded_date_series = expand_dates(df_bar_grouped)
    df_exp_dates = pd.DataFrame({'bar': len(expanded_date_series) * [bar_name]}, index=expanded_date_series)

    df_bar_grouped = pd.concat([df_exp_dates, df_bar_grouped], axis=1).fillna(0)
    df_bar_grouped.sort_index(ascending=False, inplace=True)

    df_bar_grouped[bar_name] = moving_weighted_ratings(df_bar_grouped)
    return df_bar_grouped
    
bar_moving_weighted_ratings(df_ta_bar_reviews, 'bar', '230_Fifth', 'first_of_month', 'rating').head(10)


# In[22]:


for b in df_ta_bar_reviews.bar.unique():
    df_bar_moving_weighted_ratings = bar_moving_weighted_ratings(df_ta_bar_reviews, 'bar', b, 'first_of_month', 'rating')
    print(b, '\t', df_bar_moving_weighted_ratings[b][0])
    df_all_dates = pd.concat([df_all_dates, df_bar_moving_weighted_ratings[b]], axis=1).fillna(0)


# In[23]:


df_all_dates.sort_index(ascending=False, inplace=True)
df_all_dates.head(20)


# In[24]:


# df_all_dates.to_excel('ta_bar_reviews_weighted_moving_avg.xlsx')


# ## need to concat each bar on entire date range first and then work backwards in ratings, this will remove sudden zeroes in June

# In[29]:


def bar_moving_weighted_ratings_macro_date_range(df, bar_field, bar_name, date_field, rating_field):

    df_bar_grouped = group_ratings_by_date(df[df[bar_field] == bar_name], date_field, rating_field)
    
    # change 1
    df_macro_dates_grouped = group_ratings_by_date(df, date_field, rating_field)

    # change 2
    expanded_date_series = expand_dates(df_macro_dates_grouped)
    df_exp_dates = pd.DataFrame({'bar': len(expanded_date_series) * [bar_name]}, index=expanded_date_series)

    df_bar_grouped = pd.concat([df_exp_dates, df_bar_grouped], axis=1).fillna(0)
    df_bar_grouped.sort_index(ascending=False, inplace=True)

    df_bar_grouped[bar_name] = moving_weighted_ratings(df_bar_grouped)
    return df_bar_grouped
    
bar_moving_weighted_ratings_macro_date_range(df_ta_bar_reviews, 'bar', 'Earl_s_Beer_and_Cheese', 'first_of_month', 'rating').head(10)


# In[31]:


all_date_series = expand_dates(df_ta_bar_reviews, False, 'first_of_month')

df_all_dates = pd.DataFrame({'p_holder': np.zeros(len(all_date_series))}, index=all_date_series)
df_all_dates.sort_index(ascending=False, inplace=True)
# df_all_dates.head()

for b in df_ta_bar_reviews.bar.unique():
    df_bar_moving_weighted_ratings = bar_moving_weighted_ratings_macro_date_range(df_ta_bar_reviews, 'bar', b, 'first_of_month', 'rating')
    print(b, '\t', df_bar_moving_weighted_ratings[b][0])
    df_all_dates = pd.concat([df_all_dates, df_bar_moving_weighted_ratings[b]], axis=1).fillna(0)


# In[32]:


df_all_dates.sort_index(ascending=False, inplace=True)
df_all_dates.head()


# In[33]:


# df_all_dates.to_excel('ta_bar_reviews_weighted_moving_avg_macro_date_range.xlsx')


# In[35]:


df_all_dates.Broome_Street_Bar.plot()


# ## some bars go for several months w/o reviews, perhaps suspend rating after n-months

# In[ ]:


# In[ ]: