import quandl
import warnings
import itertools
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style('whitegrid')
sns.set_context('talk')
from statsmodels.tsa.seasonal import seasonal_decompose
from arima_utils import ad_fuller_test, plot_rolling_stats
from arima_utils import plot_acf_pacf, arima_gridsearch_cv
C:\Anaconda2\envs\python3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead. from pandas.core import datetools
params = {'legend.fontsize': 'x-large',
'figure.figsize': (15, 5),
'axes.labelsize': 'x-large',
'axes.titlesize':'x-large',
'xtick.labelsize':'x-large',
'ytick.labelsize':'x-large'}
%matplotlib inline
plt.rcParams.update(params)
# specify to ignore warning messages
warnings.filterwarnings("ignore")
df = quandl.get("BUNDESBANK/BBK01_WT5511", end_date="2017-07-31")
print(df.shape)
new_df = df.reindex(pd.date_range(df.index.min(),
df.index.max(),
freq='D')).fillna(method='ffill')
print(new_df.shape)
#close_series = new_df.Close
(12152, 1) (17550, 1)
new_df.plot(figsize=(15, 6))
plt.show()
decompose = seasonal_decompose(new_df.Value.interpolate())
decompose.plot()
# Original Series
ad_fuller_test(new_df.Value)
plot_rolling_stats(new_df.Value)
Test Statistic -0.364334 p-value 0.915886 #Lags Used 43.000000 Number of Observations Used 17506.000000 Critical Value (5%) -2.861705 Critical Value (10%) -2.566858 Critical Value (1%) -3.430724 dtype: float64
log_series = np.log(new_df.Value)
ad_fuller_test(log_series)
plot_rolling_stats(log_series)
Test Statistic -1.849748 p-value 0.356057 #Lags Used 29.000000 Number of Observations Used 17520.000000 Critical Value (5%) -2.861705 Critical Value (10%) -2.566858 Critical Value (1%) -3.430723 dtype: float64
# Using log series with a shift to make it stationary
log_series_shift = log_series - log_series.shift()
log_series_shift = log_series_shift[~np.isnan(log_series_shift)]
ad_fuller_test(log_series_shift)
plot_rolling_stats(log_series_shift)
Test Statistic -23.917175 p-value 0.000000 #Lags Used 28.000000 Number of Observations Used 17520.000000 Critical Value (5%) -2.861705 Critical Value (10%) -2.566858 Critical Value (1%) -3.430723 dtype: float64
plot_acf_pacf(log_series_shift)
Log Differenced series seems stationary, same is visible through ACF and PACF plots
new_df['log_series'] = log_series
new_df['log_series_shift'] = log_series_shift
results_dict = arima_gridsearch_cv(new_df.log_series,cv_splits=5)
******************** Iteration 1 of 5 TRAIN: [ 0 1 2 ..., 2922 2923 2924] TEST: [2925 2926 2927 ..., 5847 5848 5849] Train shape:(2925,), Test shape:(2925,) ARIMA(0, 0, 0)- AIC:5358.675881541096 ARIMA(0, 0, 1)- AIC:1370.644173716044 ARIMA(0, 1, 0)- AIC:-17795.53995335306 ARIMA(0, 1, 1)- AIC:-17793.56497363464 ARIMA(1, 0, 0)- AIC:-17788.098388741855 ARIMA(1, 0, 1)- AIC:-17786.10419500408 ARIMA(1, 1, 0)- AIC:-17793.562143972762 ARIMA(1, 1, 1)- AIC:-17796.006063269502 Best Model params:(1, 1, 1) AIC:-17796.006063269502 ARIMA Model Results ============================================================================== Dep. Variable: D.log_series No. Observations: 2924 Model: ARIMA(1, 1, 1) Log Likelihood 8902.003 Method: css-mle S.D. of innovations 0.012 Date: Sat, 02 Sep 2017 AIC -17796.006 Time: 19:38:56 BIC -17772.083 Sample: 04-02-1968 HQIC -17787.390 - 04-03-1976 ====================================================================================== coef std err z P>|z| [0.025 0.975] -------------------------------------------------------------------------------------- const 0.0004 0.000 1.934 0.053 -5.59e-06 0.001 ar.L1.D.log_series -0.7385 0.120 -6.129 0.000 -0.975 -0.502 ma.L1.D.log_series 0.7649 0.115 6.668 0.000 0.540 0.990 Roots ============================================================================= Real Imaginary Modulus Frequency ----------------------------------------------------------------------------- AR.1 -1.3540 +0.0000j 1.3540 0.5000 MA.1 -1.3073 +0.0000j 1.3073 0.5000 -----------------------------------------------------------------------------
0 count 2.924000e+03 mean 1.877305e-07 std 1.152510e-02 min -1.065016e-01 25% -1.970487e-03 50% -4.105400e-04 75% 1.345751e-03 max 1.024452e-01
******************** Iteration 2 of 5 TRAIN: [ 0 1 2 ..., 5847 5848 5849] TEST: [5850 5851 5852 ..., 8772 8773 8774] Train shape:(5850,), Test shape:(2925,) ARIMA(0, 0, 0)- AIC:15792.660349788819 ARIMA(0, 0, 1)- AIC:7814.097096459958 ARIMA(0, 1, 0)- AIC:-33512.28307448926 ARIMA(0, 1, 1)- AIC:-33516.96483489898 ARIMA(1, 0, 0)- AIC:-33502.698980625006 ARIMA(1, 0, 1)- AIC:-33507.007684098426 ARIMA(1, 1, 0)- AIC:-33516.56690977644 ARIMA(1, 1, 1)- AIC:-33517.28713826041 Best Model params:(1, 1, 1) AIC:-33517.28713826041 ARIMA Model Results ============================================================================== Dep. Variable: D.log_series No. Observations: 5849 Model: ARIMA(1, 1, 1) Log Likelihood 16762.644 Method: css-mle S.D. of innovations 0.014 Date: Sat, 02 Sep 2017 AIC -33517.287 Time: 19:39:01 BIC -33490.591 Sample: 04-02-1968 HQIC -33508.004 - 04-06-1984 ====================================================================================== coef std err z P>|z| [0.025 0.975] -------------------------------------------------------------------------------------- const 0.0004 0.000 2.310 0.021 5.96e-05 0.001 ar.L1.D.log_series 0.3223 0.188 1.715 0.086 -0.046 0.691 ma.L1.D.log_series -0.3595 0.185 -1.943 0.052 -0.722 0.003 Roots ============================================================================= Real Imaginary Modulus Frequency ----------------------------------------------------------------------------- AR.1 3.1027 +0.0000j 3.1027 0.0000 MA.1 2.7820 +0.0000j 2.7820 0.0000 -----------------------------------------------------------------------------
0 count 5.849000e+03 mean -1.233252e-07 std 1.377704e-02 min -1.640484e-01 25% -2.716841e-03 50% -4.121953e-04 75% 2.374837e-03 max 1.249847e-01
******************** Iteration 3 of 5 TRAIN: [ 0 1 2 ..., 8772 8773 8774] TEST: [ 8775 8776 8777 ..., 11697 11698 11699] Train shape:(8775,), Test shape:(2925,) ARIMA(0, 0, 0)- AIC:22924.700928334434 ARIMA(0, 0, 1)- AIC:10939.921256251526 ARIMA(0, 1, 0)- AIC:-52348.468958811376 ARIMA(0, 1, 1)- AIC:-52365.886422164316 ARIMA(1, 0, 0)- AIC:-52340.135707524285 ARIMA(1, 0, 1)- AIC:-52357.13694220425 ARIMA(1, 1, 0)- AIC:-52364.9954035126 ARIMA(1, 1, 1)- AIC:-52366.212486508346 Best Model params:(1, 1, 1) AIC:-52366.212486508346 ARIMA Model Results ============================================================================== Dep. Variable: D.log_series No. Observations: 8774 Model: ARIMA(1, 1, 1) Log Likelihood 26187.106 Method: css-mle S.D. of innovations 0.012 Date: Sat, 02 Sep 2017 AIC -52366.212 Time: 19:39:07 BIC -52337.894 Sample: 04-02-1968 HQIC -52356.564 - 04-09-1992 ====================================================================================== coef std err z P>|z| [0.025 0.975] -------------------------------------------------------------------------------------- const 0.0002 0.000 2.037 0.042 9.47e-06 0.000 ar.L1.D.log_series 0.2325 0.142 1.643 0.100 -0.045 0.510 ma.L1.D.log_series -0.2810 0.139 -2.014 0.044 -0.554 -0.008 Roots ============================================================================= Real Imaginary Modulus Frequency ----------------------------------------------------------------------------- AR.1 4.3002 +0.0000j 4.3002 0.0000 MA.1 3.5588 +0.0000j 3.5588 0.0000 -----------------------------------------------------------------------------
0 count 8.774000e+03 mean -8.298166e-08 std 1.223435e-02 min -1.650598e-01 25% -2.567130e-03 50% -2.630339e-04 75% 2.402586e-03 max 1.256746e-01
******************** Iteration 4 of 5 TRAIN: [ 0 1 2 ..., 11697 11698 11699] TEST: [11700 11701 11702 ..., 14622 14623 14624] Train shape:(11700,), Test shape:(2925,) ARIMA(0, 0, 0)- AIC:28323.593243498864 ARIMA(0, 0, 1)- AIC:12343.352798126089 ARIMA(0, 1, 0)- AIC:-72092.36827299207 ARIMA(0, 1, 1)- AIC:-72119.82137921233 ARIMA(1, 0, 0)- AIC:-72085.08760862946 ARIMA(1, 0, 1)- AIC:-72112.18893141265 ARIMA(1, 1, 0)- AIC:-72118.75588542139 ARIMA(1, 1, 1)- AIC:-72119.86496021658 Best Model params:(1, 1, 1) AIC:-72119.86496021658 ARIMA Model Results ============================================================================== Dep. Variable: D.log_series No. Observations: 11699 Model: ARIMA(1, 1, 1) Log Likelihood 36063.932 Method: css-mle S.D. of innovations 0.011 Date: Sat, 02 Sep 2017 AIC -72119.865 Time: 19:39:14 BIC -72090.396 Sample: 04-02-1968 HQIC -72109.967 - 04-12-2000 ====================================================================================== coef std err z P>|z| [0.025 0.975] -------------------------------------------------------------------------------------- const 0.0002 9.61e-05 1.778 0.075 -1.75e-05 0.000 ar.L1.D.log_series 0.1907 0.124 1.534 0.125 -0.053 0.434 ma.L1.D.log_series -0.2417 0.123 -1.969 0.049 -0.482 -0.001 Roots ============================================================================= Real Imaginary Modulus Frequency ----------------------------------------------------------------------------- AR.1 5.2438 +0.0000j 5.2438 0.0000 MA.1 4.1379 +0.0000j 4.1379 0.0000 -----------------------------------------------------------------------------
0 count 1.169900e+04 mean -6.019278e-08 std 1.109183e-02 min -1.652707e-01 25% -2.322444e-03 50% -1.811495e-04 75% 2.032718e-03 max 1.259057e-01
******************** Iteration 5 of 5 TRAIN: [ 0 1 2 ..., 14622 14623 14624] TEST: [14625 14626 14627 ..., 17547 17548 17549] Train shape:(14625,), Test shape:(2925,) ARIMA(0, 0, 0)- AIC:34336.88986620506 ARIMA(0, 0, 1)- AIC:14355.652400375791 ARIMA(0, 1, 0)- AIC:-91312.75496091215 ARIMA(0, 1, 1)- AIC:-91348.77150649179 ARIMA(1, 0, 0)- AIC:-91301.13184735598 ARIMA(1, 0, 1)- AIC:-91336.41118255563 ARIMA(1, 1, 0)- AIC:-91347.65810577202 ARIMA(1, 1, 1)- AIC:-91348.2994538057 Best Model params:(0, 1, 1) AIC:-91348.77150649179 ARIMA Model Results ============================================================================== Dep. Variable: D.log_series No. Observations: 14624 Model: ARIMA(0, 1, 1) Log Likelihood 45677.386 Method: css-mle S.D. of innovations 0.011 Date: Sat, 02 Sep 2017 AIC -91348.772 Time: 19:39:24 BIC -91326.000 Sample: 04-02-1968 HQIC -91341.207 - 04-15-2008 ====================================================================================== coef std err z P>|z| [0.025 0.975] -------------------------------------------------------------------------------------- const 0.0002 8.35e-05 2.620 0.009 5.51e-05 0.000 ma.L1.D.log_series -0.0517 0.008 -6.182 0.000 -0.068 -0.035 Roots ============================================================================= Real Imaginary Modulus Frequency ----------------------------------------------------------------------------- MA.1 19.3458 +0.0000j 19.3458 0.0000 -----------------------------------------------------------------------------
0 count 1.462400e+04 mean -3.473076e-08 std 1.064827e-02 min -1.656143e-01 25% -2.387169e-03 50% -2.301705e-04 75% 2.207817e-03 max 1.258538e-01