%matplotlib inline import matplotlib.pyplot as plt import zipline from zipline.api import ( add_history, history, symbol, record, order_target_percent ) from zipline.utils.factory import load_bars_from_yahoo from zipline import TradingAlgorithm import pytz import numpy as np import pandas as pd import statsmodels.api as sm # Historical Data from Yahoo data = load_bars_from_yahoo( stocks=('EOG', 'PXD', 'SPY'), start=pd.Timestamp('2011-01-01', tz='utc'), end=pd.Timestamp.utcnow(), indexes={} ) prices = data.minor_xs('price') log_returns = np.log(prices).diff() compound_returns = (1 + log_returns).cumprod() compound_returns.plot(figsize=(14, 6)) def adf_test(y, X, intercept=True, window=None): ''' Dicky-Fuller cointegration test returns the ols obj and the adf test. e.g. (ols, adf) ''' model = pd.ols(y=y, x=X, intercept=intercept, window=window) return model, sm.tsa.adfuller(model.resid) model, adf = adf_test(prices.EOG, prices.PXD, window=20) spread = prices.EOG - model.beta['x'] * prices.PXD t_stat = adf[0] critical_values = adf[4] print 'Test statistic: ', t_stat print 'Critical Values:', critical_values spread.plot(figsize=(14,4)) # Trading Algorithm import numpy as np import statsmodels.api as sm def initialize(context): context.x = symbol('EOG') context.y = symbol('PXD') context.nobs = 20 add_history(context.nobs, '1d', 'price') context.tick = 0 def handle_data(context, data): # Allow the history frame to accumulate data context.tick += 1 if context.tick < context.nobs: return prices = np.log(history(context.nobs, '1d', 'price')) y = prices[context.y] X = prices[context.x] model = sm.OLS(y, sm.add_constant(X)).fit() hedge_ratio = model.params[context.x] spread = y - hedge_ratio * X zscore = (spread.iloc[-1] - spread.mean()) / spread.std() record( hedge_ratio=hedge_ratio, spread=spread.iloc[-1], zscore=zscore ) # Get direction of the trades, and place orders # Re-hedge to a dollar-neutral position daily in the direction of anticipated reversion. # More traditionally an entry is made when the magnitude of the z-score is large. side = np.copysign(0.5, zscore) order_target_percent(context.y, -side) order_target_percent(context.x, side * model.params[context.x]) algo = TradingAlgorithm( initialize=initialize, handle_data=handle_data ) perf = algo.run(data) perf.index = perf.index.tz_localize(pytz.utc) perf.spread.plot(figsize=(14,7)) compound_pair_returns = (1 + perf.returns).cumprod() results = compound_returns.to_dict() results['EOG ~ PXD'] = compound_pair_returns results = pd.DataFrame(results).ffill() results[['PXD', 'EOG', 'EOG ~ PXD']].plot(figsize=(14,7)) results[['SPY', 'EOG ~ PXD']].plot(figsize=(14,7)) perf[['long_exposure', 'short_exposure', 'net_leverage']].plot(figsize=(14,7)) risk_report = pd.Panel({ period: pd.DataFrame(algo.risk_report[period]) for period in algo.risk_report }) print risk_report.minor_axis risk_report.minor_xs('sharpe') import statsmodels.formula.api as smf model = smf.ols(formula="EOG ~ PXD", data=np.log(prices)).fit() model.summary()