#!/usr/bin/env python
# coding: utf-8

# # Giotto-Time
# 
# Welcome to `giotto-time`, our new library for time series forecasting!
# 
# Let's start with an example.

# ## First example

# ### Ingredients

# These are the main ingredients of `giotto-time`:

# In[1]:


get_ipython().run_line_magic('load_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from gtime.preprocessing import TimeSeriesPreparation
from gtime.compose import FeatureCreation
from gtime.feature_extraction import Shift, MovingAverage
from gtime.feature_generation import PeriodicSeasonal, Constant, Calendar
from gtime.model_selection import horizon_shift, FeatureSplitter
from gtime.forecasting import GAR


# - `TimeSeriesPreparation`: checks the input format of the time series and converts it to the expected format
# - `DataFrameTransformer`: scikit-learn's `ColumnTransformer` wrapper that returns DataFrame
# - `Shift`, `MovingAverage`: create the desired features on the time series for the forecasting
# - `FeatureSplitter`: prepares the custom `giotto-time` train-test matrices that are used in the model
# - `GAR`: Generalized Auto Regressive model, scikit-learn's `MultiOutputRegressor` wrapper. This is the only time series forecasting model available for the first release

# We also need a scikit-learn regression model. We go for a standard `LinearRegression` for this example.

# In[2]:


from sklearn.linear_model import LinearRegression


# ### Data

# We use the `pandas.testing` module to create a testing time series

# In[3]:


def test_time_series():
    from pandas.util import testing as testing

    testing.N, testing.K = 500, 1
    df = testing.makeTimeDataFrame( freq="D" )
    return df


# In[4]:


time_series = test_time_series()
print(f'Time series shape: {time_series.shape}')
print(f'Time series index type: {time_series.index.__class__}')


# ### Time Series Preparation

# The input time series has to be a `DataFrame` with a `PeriodIndex`. Use the provided class `TimeSeriesPreparation` to convert the time series into this format.

# In[5]:


time_series_preparation = TimeSeriesPreparation()


# In[6]:


period_index_time_series = time_series_preparation.transform(time_series)


# In[7]:


print(f'Time series index type after the preprocessing: {period_index_time_series.index.__class__}')


# In[8]:


period_index_time_series.plot(figsize=(20, 5))
plt.show()


# ### Feature extraction

# The feature extraction part is aimed at providing a scikit-learn paradigm with a time-series forecasting perspective
# Our `DataFrameTransformer` inherits from scikit-learn's `ColumnTransformer`, it will create a feature DataFrame with the provided Transformers.
# 
# For simplicity we will create only `Shift` and `MovingAverage` features. 
# 
# `Shift` provides a temporal shift of the time series. Adding two `Shift` features (by 1 and 2) is equivalent to an `AR(2)` model. 
# 
# Since the `DataFrameTransformer` is a `ColumnTransformer` wrapper, you can easily include features from `scikit-learn`, `tsfresh`, topological features from `giotto-tda` (\o/) or your own custom features.

# In[9]:


cal = Calendar(
    start_date="ignored",
    end_date="ignored",
    region="america",
    country="Brazil",
    kernel=np.array([0, 1]),
)
# New API 
dft = FeatureCreation(
    [('s1', Shift(1), ['time_series']), 
     ('s2', Shift(2), ['time_series']),
     ('ma3', MovingAverage(window_size=3), ['time_series']),
     # ('cal', cal, ['time_series']),
     # ('ct', Constant(2), ['time_series']),
    ])


# In[10]:


X = dft.fit_transform(period_index_time_series)
X.head(6)


# In[11]:


y = horizon_shift(period_index_time_series, horizon=3)
y.head()


# ### Train-Test split

# We use `FeatureSplitter` to split the matrices X and y in train and test. 

# In[12]:


feature_splitter = FeatureSplitter()


# In[13]:


X_train, y_train, X_test, y_test = feature_splitter.transform(X, y)


# ### Training

# We rewrapped scikit-learn's `MultiOutputRegressor` as `GAR` (Generalized Auto Regressive) model to better fit time series forecasting frameworks.
# 
# The traditional *AR* model is equivalent to the `GAR` model that uses only `Shift` columns in the `X` matrix.
# `GAR` supports all the features compatible with the feature extraction step.
# 
# *AR*: https://en.wikipedia.org/wiki/Autoregressive_model

# In[14]:


lr = LinearRegression()
model = GAR(lr)


# In[15]:


model = model.fit(X_train, y_train)


# ### Forecasting

# We forecast 3 time steps of the time series (we set this parameter in `horizon_shift` method).
# 
# The format of the output is the following:
# - the index is the step at which the prediction is made.
# - the column `y_1` is the prediction one time step after and so on for `y_2` and `y_3`

# In[16]:


predictions = model.predict(X_test)


# In[17]:


predictions