%matplotlib inline
import matplotlib.pyplot as plt

import zipline
from zipline.api import (
    add_history,
    history,
    symbol,
    record,
    order_target_percent
)
from zipline.utils.factory import load_bars_from_yahoo
from zipline import TradingAlgorithm

import pytz
import numpy as np
import pandas as pd
import statsmodels.api as sm

# Historical Data from Yahoo
data = load_bars_from_yahoo(
    stocks=('EOG', 'PXD', 'SPY'),
    start=pd.Timestamp('2011-01-01', tz='utc'),
    end=pd.Timestamp.utcnow(),
    indexes={}
)
prices = data.minor_xs('price')
log_returns = np.log(prices).diff()
compound_returns = (1 + log_returns).cumprod()
compound_returns.plot(figsize=(14, 6))

def adf_test(y, X, intercept=True, window=None):
    ''' 
    Dicky-Fuller cointegration test 
    returns the ols obj and the adf test. e.g. (ols, adf)
    '''
    model = pd.ols(y=y, x=X, intercept=intercept, window=window)
    return model, sm.tsa.adfuller(model.resid)  

model, adf = adf_test(prices.EOG, prices.PXD, window=20)

spread = prices.EOG - model.beta['x'] * prices.PXD

t_stat = adf[0]
critical_values = adf[4]

print 'Test statistic: ', t_stat
print 'Critical Values:', critical_values

spread.plot(figsize=(14,4))

# Trading Algorithm
import numpy as np
import statsmodels.api as sm


def initialize(context):
    context.x = symbol('EOG')
    context.y = symbol('PXD')
    context.nobs = 20
    add_history(context.nobs, '1d', 'price')
    context.tick = 0


def handle_data(context, data):
    # Allow the history frame to accumulate data
    context.tick += 1
    if context.tick < context.nobs:
        return
    
    prices = np.log(history(context.nobs, '1d', 'price'))
    y = prices[context.y]
    X = prices[context.x]
    
    model = sm.OLS(y, sm.add_constant(X)).fit()
    hedge_ratio = model.params[context.x]
    
    spread = y - hedge_ratio * X
    zscore = (spread.iloc[-1] - spread.mean()) / spread.std()
    
    record(
        hedge_ratio=hedge_ratio,
        spread=spread.iloc[-1],
        zscore=zscore
    )
    # Get direction of the trades, and place orders
    # Re-hedge to a dollar-neutral position daily in the direction of anticipated reversion. 
    # More traditionally an entry is made when the magnitude of the z-score is large.
    side = np.copysign(0.5, zscore)
    order_target_percent(context.y, -side)
    order_target_percent(context.x,  side * model.params[context.x])
    
    
algo = TradingAlgorithm(
    initialize=initialize,
    handle_data=handle_data
)

perf = algo.run(data)
perf.index = perf.index.tz_localize(pytz.utc)

perf.spread.plot(figsize=(14,7))

compound_pair_returns = (1 + perf.returns).cumprod()
results = compound_returns.to_dict()
results['EOG ~ PXD'] = compound_pair_returns
results = pd.DataFrame(results).ffill()
results[['PXD', 'EOG', 'EOG ~ PXD']].plot(figsize=(14,7))
results[['SPY', 'EOG ~ PXD']].plot(figsize=(14,7))

perf[['long_exposure', 'short_exposure', 'net_leverage']].plot(figsize=(14,7))

risk_report = pd.Panel({
    period: pd.DataFrame(algo.risk_report[period])
    for period in algo.risk_report
})
print risk_report.minor_axis          
risk_report.minor_xs('sharpe')

import statsmodels.formula.api as smf

model = smf.ols(formula="EOG ~ PXD", data=np.log(prices)).fit()

model.summary()