#!/usr/bin/env python
# coding: utf-8

# Notebook from [Forecast Website Traffic Using Facebook's Prophet Library](http://pbpython.com/prophet-overview.html) 

# Note: The recommended way to install prophet is to use the following command:
# 
# conda install -c conda-forge fbprophet

# In[1]:


import pandas as pd
import numpy as np
from fbprophet import Prophet
import matplotlib.pyplot as plt


# Use ggplot style for ascentic reasons

# In[2]:


get_ipython().run_line_magic('matplotlib', 'inline')
plt.style.use('ggplot')


# Read in the data file and store as a pandas dataframe

# In[3]:


data_file = "https://github.com/chris1610/pbpython/blob/master/data/All-Web-Site-Data-Audience-Overview.xlsx?raw=True"
df = pd.read_excel(data_file)
df.head()


# Check the data types to make sure the Day Index is a datetime type

# In[4]:


df.dtypes


# Do a simple plot

# In[5]:


df.set_index('Day Index').plot();


# Filter out the outlier traffic spike

# In[6]:


df.loc[(df['Sessions'] > 5000), 'Sessions'] = np.nan
df.set_index('Day Index').plot();


# More info on why we use log here - https://people.duke.edu/~rnau/411log.htm

# In[7]:


df['Sessions'] = np.log(df['Sessions'])
df.head()


# In[8]:


df.set_index('Day Index').plot();


# Rename the columns to comply with the prophet API

# In[9]:


df.columns = ["ds", "y"]
df.head()


# Create a prophet object and fit it to our data

# In[10]:


m1 = Prophet()
m1.fit(df)


# Create the future days we want to predict

# In[11]:


future1 = m1.make_future_dataframe(periods=365)
future1.tail()


# Predict the future!

# In[12]:


forecast1 = m1.predict(future1)


# Look at the values contained in the forecast dataframe

# In[13]:


forecast1.head()


# In[14]:


forecast1[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()


# In[15]:


np.exp(forecast1[['yhat', 'yhat_lower', 'yhat_upper']].tail())


# Plot the forecast for the next year

# In[16]:


m1.plot(forecast1);


# In[17]:


m1.plot_components(forecast1);


# Create a dataframe with all the blog posts as well as the new one happening on March 6th, 2017.

# In[18]:


articles = pd.DataFrame({
  'holiday': 'publish',
  'ds': pd.to_datetime(['2014-09-27', '2014-10-05', '2014-10-14', '2014-10-26', '2014-11-9', 
                        '2014-11-18', '2014-11-30', '2014-12-17', '2014-12-29', '2015-01-06',
                        '2015-01-20', '2015-02-02', '2015-02-16', '2015-03-23', '2015-04-08',
                        '2015-05-04', '2015-05-17', '2015-06-09', '2015-07-02', '2015-07-13',
                        '2015-08-17', '2015-09-14', '2015-10-26', '2015-12-07', '2015-12-30',
                        '2016-01-26', '2016-04-06', '2016-05-16', '2016-06-15', '2016-08-23',
                        '2016-08-29', '2016-09-06', '2016-11-21', '2016-12-19', '2017-01-17',
                        '2017-02-06', '2017-02-21', '2017-03-06']),
  'lower_window': 0,
  'upper_window': 5,
})


# In[19]:


articles.head()


# Create a new model with the holidays defined

# In[20]:


m2 = Prophet(holidays=articles).fit(df)


# In[21]:


future2 = m2.make_future_dataframe(periods=90)


# In[22]:


forecast2 = m2.predict(future2)


# In[23]:


m2.plot(forecast2);


# In[24]:


m2.plot_components(forecast2);


# Create the final model using holidays and applying some bayesian sampling to handle the errors with the holidays.
# This will take a couple of minutes to run.

# In[25]:


m3 = Prophet(holidays=articles, mcmc_samples=500).fit(df)
future3 = m3.make_future_dataframe(periods=90)
forecast3 = m3.predict(future3)


# Convert the log values back to the "real world" values.

# In[26]:


forecast3["Sessions"] = np.exp(forecast3.yhat).round()
forecast3["Sessions_lower"] = np.exp(forecast3.yhat_lower).round()
forecast3["Sessions_upper"] = np.exp(forecast3.yhat_upper).round()
forecast3[(forecast3.ds > "3-5-2017") & 
          (forecast3.ds < "4-1-2017")][["ds", "yhat", "Sessions_lower", "Sessions", "Sessions_upper"]]


# In[27]:


forecast3.to_excel("March-2017-forecast.xlsx")


# In[ ]: