#!/usr/bin/env python # coding: utf-8 # # Cointegration Example in time series # Example 3.1 from the book "Likelihood-Based-Inference-Cointegrated-Autoregressive-Econometrics" # In[4]: import numpy as np import matplotlib.pyplot as plt import pandas as pd from statsmodels.tsa.stattools import adfuller from statsmodels.tsa.vector_ar.vecm import coint_johansen from statsmodels.tsa.vector_ar.var_model import VAR get_ipython().run_line_magic('pylab', 'inline') pylab.rcParams['figure.figsize'] = (18.0, 8.0) # In[8]: mu, sigma = 0, 1 # mean and standard deviation n = 10000 s1 = np.random.normal(mu, sigma, n) s2 = np.random.normal(mu, sigma, n) s3 = np.random.normal(mu, sigma, n) # In[9]: s = s1 plt.figure(1) plt.subplot(121) plt.plot(s) plt.subplot(122) plt.hist(s, 30, normed=True) plt.show() We will create the processes with the following properties: x_1t is I(1) x_2t is I(1) x_3t is I(0) Ther cointegration vectors are: [a -1 0] [0 0 1] So they hold: a*x_1t-x_2t +0*x_3t ~ I(0) and 0*x_1t-0*x_2t + x_3t is I(0) # In[10]: a = 0.5 x_1t = np.cumsum(s1)+s2 x_2t = a*np.cumsum(s1)+s3 x_3t = s3 todays_date = datetime.datetime.now().date() index = pd.date_range(todays_date-datetime.timedelta(10), periods=n, freq='D') y = pd.DataFrame(index=index, data={'col1': x_1t, 'col2': x_2t, 'col3':x_3t} ) y.plot() # ## Augmented Dickey Fuller test # In[11]: def ADF(v, crit='5%', max_d=6, reg='nc', autolag='AIC'): """ Augmented Dickey Fuller test Parameters ---------- v: ndarray matrix residuals matrix Returns ------- bool: boolean true if v pass the test """ boolean = False adf = adfuller(v, max_d, reg, autolag) print adf if(adf[0] < adf[4][crit]): pass else: boolean = True return boolean # # Testing Unit root # In[12]: print ADF(x_1t) # unit root hypothesis can't be rejected print ADF(np.diff(x_1t)) print ADF(x_2t) # unit root hypothesis can't be rejected print ADF(np.diff(x_2t)) print ADF(x_3t) # unit root hypothesis is rejected at the 95 per cent level -> I(0) # ## Johansen function # In[13]: def get_johansen(y, p): """ Get the cointegration vectors at 95% level of significance given by the trace statistic test. """ N, l = y.shape jres = coint_johansen(y, 0, p) trstat = jres.lr1 # trace statistic tsignf = jres.cvt # critical values for i in range(l): if trstat[i] > tsignf[i, 1]: # 0: 90% 1:95% 2: 99% r = i + 1 jres.r = r jres.evecr = jres.evec[:, :r] return jres # In[14]: p=1 jres=get_johansen(y,p) print "There are ", jres.r, "cointegration vectors" v1=jres.evecr[:,0] v2=jres.evecr[:,1] print v1 print v2 v3=jres.evec[:,2] # v3 is not a cointegration vector # Even though we were expecting [0.5 -1 0] y [0 0 1] these vectors are in the same vector space. If we divide every vector by their second component we will get some similar results. # In[15]: v1/-v1[1] # In[16]: v2/-v2[1] # In[17]: # If we apply vector v1 we get a I(0) time series plot(np.dot(y.as_matrix(),v1)) # In[18]: # If we apply vector v2 we get a I(0) time series plot(np.dot(y.as_matrix(),v2)) # In[19]: # If we apply vector v3 we won't get a I(0) time series since it's not a cointegration vector. plot(np.dot(y.as_matrix(),v3))