import pandas as pd import pandas.io.data import numpy as np delta_t = 3 djia = pd.DataFrame.from_csv('DJIA.csv', index_col='DATE').sort() sp500 = pd.DataFrame(pd.io.data.DataReader("^GSPC", "yahoo", start=datetime.datetime(2004, 1, 1), end=datetime.datetime(2011, 4, 1))['Adj Close'], columns=['sp500']) #sp500 = sp500.tshift(1, freq='B') debt = pd.read_csv('debt_google_trend.csv', index_col='date') dija.index = dija.index.to_datetime() djia.rename(columns={'VALUE': 'djia'}, inplace=True) debt.index = debt.index.to_datetime() debt = debt.tshift(2, freq='D') # Moves Saturday to Monday # Load Preis data df = pd.read_csv('PreisMoatStanley_ScientificReports_3_1684_2013.dat', delimiter=' ') columns = ['Google Start Date', 'Google End Date', 'debt', 'DJIA Date', 'DJIA Closing Price'] df = df[columns] debt_from_study = df[['debt', 'DJIA Closing Price']] debt_from_study.rename(columns={'debt': 'debt_preis', 'DJIA Closing Price': 'djia_preis'}, inplace=True) debt_from_study.index = df['Google End Date'] debt_from_study.index = debt_from_study.index.to_datetime() debt_from_study = debt_from_study.tshift(2, freq='D') # Move from Sat to Monday debt_from_study.head() # Join together into single dataframe. data = pd.concat([debt, sp500, debt_from_study], axis=1).dropna() data.head() data.debt.plot() data.debt_preis.plot(secondary_y=True) data['rolling_mean'] = pd.rolling_mean(data.debt, delta_t).shift(1) data['rolling_mean_preis'] = pd.rolling_mean(data.debt_preis, delta_t).shift(1) data.head(10) data['order'] = 0 data['order'][data.debt > data.rolling_mean.shift(1)] = -1 # Shorting if cross-over data['order'][data.debt < data.rolling_mean.shift(1)] = 1 # Buying otherwise. data['order'].ix[:delta_t] = 0 data['order_preis'] = 1 data['order_preis'][data.debt_preis > data.rolling_mean_preis] = -1 # Shorting if cross-over data['order_preis'][data.debt_preis < data.rolling_mean_preis] = 1 # Buying otherwise. data['order_preis'].ix[:delta_t] = 0 print "% of overlapping trading signals from Preis debt data and newly retrieved search data: ", (data.order == data.order_preis).mean() data.head(10) # Compute log returns using djia and debt search volume from Preis et al. data['log_returns'] = data.order_preis * np.log(data.djia_preis.shift(-1)) - data.order_preis * np.log(data.djia_preis) # Compute log returns using S&P500. data['log_returns_sp'] = data.order_preis * np.log(data.sp500.shift(-1)) - data.order_preis * np.log(data.sp500) # Compute log returns using debt search volume recently extracted from trends data['log_returns_new_data'] = data.order * np.log(data.djia_preis.shift(-1)) - data.order * np.log(data.djia_preis) plt.figsize(10, 3) (np.exp(data.log_returns.cumsum()) - 1).plot(); from IPython.core.display import Image Image("http://www.nature.com/srep/2013/130425/srep01684/carousel/srep01684-f2.jpg") plt.figsize(10, 3) (np.exp(data.log_returns_new_data.cumsum()) - 1).plot(); plt.figsize(10, 3) (np.exp(data.log_returns_sp.cumsum()) - 1).plot();