#!/usr/bin/env python # coding: utf-8 # # Giotto-Time # # Welcome to `giotto-time`, our new library for time series forecasting! # # Let's start with an example. # ## First example # ### Ingredients # These are the main ingredients of `giotto-time`: # In[1]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') import pandas as pd import numpy as np import matplotlib.pyplot as plt from gtime.preprocessing import TimeSeriesPreparation from gtime.compose import FeatureCreation from gtime.feature_extraction import Shift, MovingAverage from gtime.feature_generation import PeriodicSeasonal, Constant, Calendar from gtime.model_selection import horizon_shift, FeatureSplitter from gtime.forecasting import GAR # - `TimeSeriesPreparation`: checks the input format of the time series and converts it to the expected format # - `DataFrameTransformer`: scikit-learn's `ColumnTransformer` wrapper that returns DataFrame # - `Shift`, `MovingAverage`: create the desired features on the time series for the forecasting # - `FeatureSplitter`: prepares the custom `giotto-time` train-test matrices that are used in the model # - `GAR`: Generalized Auto Regressive model, scikit-learn's `MultiOutputRegressor` wrapper. This is the only time series forecasting model available for the first release # We also need a scikit-learn regression model. We go for a standard `LinearRegression` for this example. # In[2]: from sklearn.linear_model import LinearRegression # ### Data # We use the `pandas.testing` module to create a testing time series # In[3]: def test_time_series(): from pandas.util import testing as testing testing.N, testing.K = 500, 1 df = testing.makeTimeDataFrame( freq="D" ) return df # In[4]: time_series = test_time_series() print(f'Time series shape: {time_series.shape}') print(f'Time series index type: {time_series.index.__class__}') # ### Time Series Preparation # The input time series has to be a `DataFrame` with a `PeriodIndex`. Use the provided class `TimeSeriesPreparation` to convert the time series into this format. # In[5]: time_series_preparation = TimeSeriesPreparation() # In[6]: period_index_time_series = time_series_preparation.transform(time_series) # In[7]: print(f'Time series index type after the preprocessing: {period_index_time_series.index.__class__}') # In[8]: period_index_time_series.plot(figsize=(20, 5)) plt.show() # ### Feature extraction # The feature extraction part is aimed at providing a scikit-learn paradigm with a time-series forecasting perspective # Our `DataFrameTransformer` inherits from scikit-learn's `ColumnTransformer`, it will create a feature DataFrame with the provided Transformers. # # For simplicity we will create only `Shift` and `MovingAverage` features. # # `Shift` provides a temporal shift of the time series. Adding two `Shift` features (by 1 and 2) is equivalent to an `AR(2)` model. # # Since the `DataFrameTransformer` is a `ColumnTransformer` wrapper, you can easily include features from `scikit-learn`, `tsfresh`, topological features from `giotto-tda` (\o/) or your own custom features. # In[9]: cal = Calendar( start_date="ignored", end_date="ignored", region="america", country="Brazil", kernel=np.array([0, 1]), ) # New API dft = FeatureCreation( [('s1', Shift(1), ['time_series']), ('s2', Shift(2), ['time_series']), ('ma3', MovingAverage(window_size=3), ['time_series']), # ('cal', cal, ['time_series']), # ('ct', Constant(2), ['time_series']), ]) # In[10]: X = dft.fit_transform(period_index_time_series) X.head(6) # In[11]: y = horizon_shift(period_index_time_series, horizon=3) y.head() # ### Train-Test split # We use `FeatureSplitter` to split the matrices X and y in train and test. # In[12]: feature_splitter = FeatureSplitter() # In[13]: X_train, y_train, X_test, y_test = feature_splitter.transform(X, y) # ### Training # We rewrapped scikit-learn's `MultiOutputRegressor` as `GAR` (Generalized Auto Regressive) model to better fit time series forecasting frameworks. # # The traditional *AR* model is equivalent to the `GAR` model that uses only `Shift` columns in the `X` matrix. # `GAR` supports all the features compatible with the feature extraction step. # # *AR*: https://en.wikipedia.org/wiki/Autoregressive_model # In[14]: lr = LinearRegression() model = GAR(lr) # In[15]: model = model.fit(X_train, y_train) # ### Forecasting # We forecast 3 time steps of the time series (we set this parameter in `horizon_shift` method). # # The format of the output is the following: # - the index is the step at which the prediction is made. # - the column `y_1` is the prediction one time step after and so on for `y_2` and `y_3` # In[16]: predictions = model.predict(X_test) # In[17]: predictions