This notebook contains material from Controlling Natural Watersheds; content is available on Github.
%matplotlib inline
# ARMA example using sunpots data
import numpy as np
from scipy import stats
import pandas
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.graphics.api import qqplot
print(sm.datasets.sunspots.NOTE)
dta = sm.datasets.sunspots.load_pandas().data
dta.index = pandas.Index(sm.tsa.datetools.dates_from_range('1700', '2008'))
del dta["YEAR"]
dta.plot(figsize=(12,8));
:: Number of Observations - 309 (Annual 1700 - 2008) Number of Variables - 1 Variable name definitions:: SUNACTIVITY - Number of sunspots for each year The data file contains a 'YEAR' variable that is not returned by load.
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(dta.values.squeeze(), lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(dta, lags=40, ax=ax2)
# <codecell>
arma_mod20 = sm.tsa.ARMA(dta, (2,0)).fit()
print(arma_mod20.params)
print(arma_mod20.aic, arma_mod20.bic, arma_mod20.hqic)
const 49.659343 ar.L1.SUNACTIVITY 1.390656 ar.L2.SUNACTIVITY -0.688571 dtype: float64 2622.6363380639814 2637.56970317 2628.60672591
# <codecell>
arma_mod30 = sm.tsa.ARMA(dta, (3,0)).fit()
# <codecell>
print(arma_mod30.params)
# <codecell>
print(arma_mod30.aic, arma_mod30.bic, arma_mod30.hqic)
const 49.749900 ar.L1.SUNACTIVITY 1.300810 ar.L2.SUNACTIVITY -0.508093 ar.L3.SUNACTIVITY -0.129649 dtype: float64 2619.4036286966957 2638.07033508 2626.8666135
# <markdowncell>
# * Does our model obey the theory?
# <codecell>
sm.stats.durbin_watson(arma_mod30.resid.values)
# <codecell>
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111)
ax = arma_mod30.resid.plot(ax=ax);
resid = arma_mod30.resid
stats.normaltest(resid)
NormaltestResult(statistic=49.84502837307943, pvalue=1.5006852489043021e-11)
# <codecell>
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111)
fig = qqplot(resid, line='q', ax=ax, fit=True)
# <codecell>
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)
# <codecell>
r,q,p = sm.tsa.acf(resid.values.squeeze(), qstat=True)
data = np.c_[range(1,41), r[1:], q, p]
table = pandas.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"])
print(table.set_index('lag'))
AC Q Prob(>Q) lag 1.0 0.009179 0.026286 8.712033e-01 2.0 0.041793 0.573043 7.508710e-01 3.0 -0.001335 0.573602 9.024480e-01 4.0 0.136089 6.408918 1.706205e-01 5.0 0.092468 9.111822 1.046862e-01 6.0 0.091948 11.793234 6.674370e-02 7.0 0.068748 13.297190 6.519008e-02 8.0 -0.015020 13.369218 9.976168e-02 9.0 0.187592 24.641899 3.393923e-03 10.0 0.213718 39.321985 2.229482e-05 11.0 0.201082 52.361130 2.344956e-07 12.0 0.117182 56.804183 8.574277e-08 13.0 -0.014055 56.868320 1.893906e-07 14.0 0.015398 56.945559 3.997667e-07 15.0 -0.024967 57.149314 7.741483e-07 16.0 0.080916 59.296763 6.872181e-07 17.0 0.041138 59.853731 1.110947e-06 18.0 -0.052021 60.747422 1.548436e-06 19.0 0.062496 62.041685 1.831647e-06 20.0 -0.010301 62.076972 3.381250e-06 21.0 0.074453 63.926648 3.193592e-06 22.0 0.124955 69.154766 8.978368e-07 23.0 0.093162 72.071031 5.799790e-07 24.0 -0.082152 74.346683 4.713022e-07 25.0 0.015695 74.430039 8.289052e-07 26.0 -0.025037 74.642898 1.367285e-06 27.0 -0.125861 80.041145 3.722568e-07 28.0 0.053225 81.009978 4.716283e-07 29.0 -0.038693 81.523804 6.916637e-07 30.0 -0.016904 81.622223 1.151662e-06 31.0 -0.019296 81.750935 1.868767e-06 32.0 0.104990 85.575063 8.927962e-07 33.0 0.040086 86.134564 1.247509e-06 34.0 0.008829 86.161808 2.047826e-06 35.0 0.014588 86.236445 3.263808e-06 36.0 -0.119329 91.248896 1.084454e-06 37.0 -0.036665 91.723865 1.521923e-06 38.0 -0.046193 92.480514 1.938734e-06 39.0 -0.017768 92.592883 2.990678e-06 40.0 -0.006220 92.606706 4.696982e-06
# <markdowncell>
# * This indicates a lack of fit.
# <markdowncell>
# * In-sample dynamic prediction. How good does our model do?
# <codecell>
predict_sunspots = arma_mod30.predict('1990', '2012', dynamic=True)
print predict_sunspots
# <codecell>
ax = dta.ix['1950':].plot(figsize=(12,8))
ax = predict_sunspots.plot(ax=ax, style='r--', label='Dynamic Prediction');
ax.legend();
ax.axis((-20.0, 38.0, -4.0, 200.0));
# <codecell>
def mean_forecast_err(y, yhat):
return y.sub(yhat).mean()
# <codecell>
mean_forecast_err(dta.SUNACTIVITY, predict_sunspots)
# <headingcell level=3>
# Exercise: Can you obtain a better fit for the Sunspots model? (Hint: sm.tsa.AR has a method select_order)
# <headingcell level=3>
# Simulated ARMA(4,1): Model Identification is Difficult
# <codecell>
from statsmodels.tsa.arima_process import arma_generate_sample, ArmaProcess
# <codecell>
np.random.seed(1234)
# include zero-th lag
arparams = np.array([1, .75, -.65, -.55, .9])
maparams = np.array([1, .65])
# <markdowncell>
# * Let's make sure this models is estimable.
# <codecell>
arma_t = ArmaProcess(arparams, maparams)
# <codecell>
arma_t.isinvertible()
# <codecell>
arma_t.isstationary()
# <rawcell>
# * What does this mean?
# <codecell>
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111)
ax.plot(arma_t.generate_sample(size=50));
# <codecell>
arparams = np.array([1, .35, -.15, .55, .1])
maparams = np.array([1, .65])
arma_t = ArmaProcess(arparams, maparams)
arma_t.isstationary()
# <codecell>
arma_rvs = arma_t.generate_sample(size=500, burnin=250, scale=2.5)
# <codecell>
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(arma_rvs, lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(arma_rvs, lags=40, ax=ax2)
# <rawcell>
# * For mixed ARMA processes the Autocorrelation function is a mixture of exponentials and damped sine waves after (q-p) lags.
# * The partial autocorrelation function is a mixture of exponentials and dampened sine waves after (p-q) lags.
# <codecell>
arma11 = sm.tsa.ARMA(arma_rvs, (1,1)).fit()
resid = arma11.resid
r,q,p = sm.tsa.acf(resid, qstat=True)
data = np.c_[range(1,41), r[1:], q, p]
table = pandas.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"])
print table.set_index('lag')
# <codecell>
arma41 = sm.tsa.ARMA(arma_rvs, (4,1)).fit()
resid = arma41.resid
r,q,p = sm.tsa.acf(resid, qstat=True)
data = np.c_[range(1,41), r[1:], q, p]
table = pandas.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"])
print table.set_index('lag')
# <headingcell level=3>
# Exercise: How good of in-sample prediction can you do for another series, say, CPI
# <codecell>
macrodta = sm.datasets.macrodata.load_pandas().data
macrodta.index = pandas.Index(sm.tsa.datetools.dates_from_range('1959Q1', '2009Q3'))
cpi = macrodta["cpi"]
# <headingcell level=4>
# Hint:
# <codecell>
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111)
ax = cpi.plot(ax=ax);
ax.legend();
# <rawcell>
# P-value of the unit-root test, resoundly rejects the null of no unit-root.
# <codecell>
print sm.tsa.adfuller(cpi)[1]
Contact GitHub API Training Shop Blog About
© 2017 GitHub, Inc. Terms Privacy Security Status Help
File "<ipython-input-3-dc8da7271502>", line 50 print arma_mod20.params ^ SyntaxError: Missing parentheses in call to 'print'