#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd from collections import defaultdict import warnings warnings.filterwarnings("ignore") import pandas as pd import plotly import greykite from greykite.common.data_loader import DataLoader from greykite.framework.templates.autogen.forecast_config import ForecastConfig, ComputationParam, ModelComponentsParam from greykite.framework.templates.autogen.forecast_config import MetadataParam from greykite.framework.templates.forecaster import Forecaster from greykite.framework.templates.model_templates import ModelTemplateEnum from greykite.framework.utils.result_summary import summarize_grid_search_results from fbprophet import Prophet # ## peyton_manning # In[2]: # Loads dataset into pandas DataFrame dl = DataLoader() df = dl.load_peyton_manning() # In[3]: # specify dataset information metadata = MetadataParam( time_col='ts', # name of the time column ("date" in example above) value_col='y', # name of the value column ("sessions" in example above) freq='D'# "H" for hourly, "D" for daily, "W" for weekly, etc. # Any format accepted by `pandas.date_range` ) # ### SILVERKITE # In[4]: forecaster = Forecaster() # Creates forecasts and stores the result result = forecaster.run_forecast_config( # result is also stored as `forecaster.forecast_result`. df=df, config=ForecastConfig( model_template=ModelTemplateEnum.SILVERKITE.name, forecast_horizon=365, # forecasts N steps ahead coverage=0.95, # 95% prediction intervals metadata_param=metadata ) ) # In[5]: backtest = result.backtest fig = backtest.plot() plotly.io.show(fig) # In[6]: forecast = result.forecast fig = forecast.plot_components() plotly.io.show(fig) # fig.show() if you are using "PROPHET" template # In[7]: grid_search = result.grid_search cv_results = summarize_grid_search_results( grid_search=grid_search, decimals=2, # The below saves space in the printed output. Remove to show all available metrics and columns. cv_report_metrics=None, column_order=["rank", "mean_test", "split_test", "mean_train", "split_train", "mean_fit_time", "mean_score_time", "params"] ) # Transposes to save space in the printed output cv_results["params"] = cv_results["params"].astype(str) cv_results.set_index("params", drop=True, inplace=True) cv_results.transpose() # ### SILVERKITE(gradient_boosting) # In[8]: forecaster = Forecaster() # Creates forecasts and stores the result result = forecaster.run_forecast_config( # result is also stored as `forecaster.forecast_result`. df=df, config=ForecastConfig( model_template=ModelTemplateEnum.SILVERKITE.name, forecast_horizon=365, # forecasts N steps ahead coverage=0.95, # 95% prediction intervals metadata_param=metadata, model_components_param=ModelComponentsParam( custom=dict(fit_algorithm_dict=dict(fit_algorithm="gradient_boosting")) ) ) ) # In[9]: backtest = result.backtest fig = backtest.plot() plotly.io.show(fig) # In[10]: grid_search = result.grid_search cv_results = summarize_grid_search_results( grid_search=grid_search, decimals=2, # The below saves space in the printed output. Remove to show all available metrics and columns. cv_report_metrics=None, column_order=["rank", "mean_test", "split_test", "mean_train", "split_train", "mean_fit_time", "mean_score_time", "params"] ) # Transposes to save space in the printed output cv_results["params"] = cv_results["params"].astype(str) cv_results.set_index("params", drop=True, inplace=True) cv_results.transpose() # ### PROPHET # In[11]: forecaster = Forecaster() # Creates forecasts and stores the result result = forecaster.run_forecast_config( # result is also stored as `forecaster.forecast_result`. df=df, config=ForecastConfig( model_template=ModelTemplateEnum.PROPHET.name, forecast_horizon=365, # forecasts N steps ahead coverage=0.95, # 95% prediction intervals metadata_param=metadata, ) ) # In[12]: backtest = result.backtest fig = backtest.plot() plotly.io.show(fig) # In[13]: forecast = result.forecast fig = forecast.plot_components() fig.show() # In[14]: grid_search = result.grid_search cv_results = summarize_grid_search_results( grid_search=grid_search, decimals=2, # The below saves space in the printed output. Remove to show all available metrics and columns. cv_report_metrics=None, column_order=["rank", "mean_test", "split_test", "mean_train", "split_train", "mean_fit_time", "mean_score_time", "params"] ) # Transposes to save space in the printed output cv_results["params"] = cv_results["params"].astype(str) cv_results.set_index("params", drop=True, inplace=True) cv_results.transpose() # ## AirPassengers # In[15]: df = pd.read_csv('../data/AirPassengers.csv') df['Month'] = pd.to_datetime(df['Month']) # In[16]: # specify dataset information metadata = MetadataParam( time_col="Month", # name of the time column ("date" in example above) value_col="#Passengers", # name of the value column ("sessions" in example above) freq="MS"# "H" for hourly, "D" for daily, "W" for weekly, etc. # Any format accepted by `pandas.date_range` ) # ### SILVERKITE # In[17]: forecaster = Forecaster() # Creates forecasts and stores the result result = forecaster.run_forecast_config( # result is also stored as `forecaster.forecast_result`. df=df, config=ForecastConfig( model_template=ModelTemplateEnum.SILVERKITE.name, forecast_horizon=12, # forecasts N steps ahead coverage=0.95, # 95% prediction intervals metadata_param=metadata ) ) # In[18]: backtest = result.backtest fig = backtest.plot() plotly.io.show(fig) # In[19]: forecast = result.forecast fig = forecast.plot_components() plotly.io.show(fig) # fig.show() if you are using "PROPHET" template # In[20]: grid_search = result.grid_search cv_results = summarize_grid_search_results( grid_search=grid_search, decimals=2, # The below saves space in the printed output. Remove to show all available metrics and columns. cv_report_metrics=None, column_order=["rank", "mean_test", "split_test", "mean_train", "split_train", "mean_fit_time", "mean_score_time", "params"]) # Transposes to save space in the printed output cv_results["params"] = cv_results["params"].astype(str) cv_results.set_index("params", drop=True, inplace=True) cv_results.transpose() # ### SILVERKITE(gradient_boosting) # In[21]: forecaster = Forecaster() # Creates forecasts and stores the result result = forecaster.run_forecast_config( # result is also stored as `forecaster.forecast_result`. df=df, config=ForecastConfig( model_template=ModelTemplateEnum.SILVERKITE.name, forecast_horizon=12, # forecasts N steps ahead coverage=0.95, # 95% prediction intervals metadata_param=metadata, model_components_param=ModelComponentsParam( custom=dict(fit_algorithm_dict=dict(fit_algorithm='gradient_boosting')) ) ) ) # In[22]: backtest = result.backtest fig = backtest.plot() plotly.io.show(fig) # In[23]: grid_search = result.grid_search cv_results = summarize_grid_search_results( grid_search=grid_search, decimals=2, # The below saves space in the printed output. Remove to show all available metrics and columns. cv_report_metrics=None, column_order=["rank", "mean_test", "split_test", "mean_train", "split_train", "mean_fit_time", "mean_score_time", "params"]) # Transposes to save space in the printed output cv_results["params"] = cv_results["params"].astype(str) cv_results.set_index("params", drop=True, inplace=True) cv_results.transpose() # ### PROPHET # In[24]: forecaster = Forecaster() # Creates forecasts and stores the result result = forecaster.run_forecast_config( # result is also stored as `forecaster.forecast_result`. df=df, config=ForecastConfig( model_template=ModelTemplateEnum.PROPHET.name, forecast_horizon=12, # forecasts 365 steps ahead coverage=0.95, # 95% prediction intervals metadata_param=metadata, model_components_param=ModelComponentsParam( seasonality=dict(seasonality_mode='multiplicative') ) ) ) # In[25]: backtest = result.backtest fig = backtest.plot() plotly.io.show(fig) # In[26]: forecast = result.forecast fig = forecast.plot_components() #plotly.io.show(fig) # fig.show() if you are using "PROPHET" template fig.show() # In[27]: grid_search = result.grid_search cv_results = summarize_grid_search_results( grid_search=grid_search, decimals=2, # The below saves space in the printed output. Remove to show all available metrics and columns. cv_report_metrics=None, column_order=["rank", "mean_test", "split_test", "mean_train", "split_train", "mean_fit_time", "mean_score_time", "params"]) # Transposes to save space in the printed output cv_results["params"] = cv_results["params"].astype(str) cv_results.set_index("params", drop=True, inplace=True) cv_results.transpose()