In [1]:

import pandas as pd
import numpy as np

Data used by researchers in the study:

In [2]:

!cat PreisMoatStanley_ScientificReports_3_1684_2013.dat | head -n 2

"Google Start Date" "Google End Date" "arts" "banking" "bonds" "bubble" "buy" "cancer" "car" "cash" "chance" "color" "conflict" "consume" "consumption" "crash" "credit" "crisis" "culture" "debt" "default" "derivatives" "dividend" "dow jones" "earnings" "economics" "economy" "energy" "environment" "fed" "finance" "financial markets" "fine" "fond" "food" "forex" "freedom" "fun" "gain" "gains" "garden" "gold" "greed" "growth" "happy" "headlines" "health" "hedge" "holiday" "home" "house" "housing" "inflation" "invest" "investment" "kitchen" "labor" "leverage" "lifestyle" "loss" "markets" "marriage" "metals" "money" "movie" "nasdaq" "nyse" "office" "oil" "opportunity" "ore" "politics" "portfolio" "present" "profit" "rare earths" "religion" "restaurant" "return" "returns" "revenue" "rich" "ring" "risk" "sell" "short sell" "short selling" "society" "stock market" "stocks" "success" "tourism" "trader" "train" "transaction" "travel" "unemployment" "war" "water" "world" "DJIA Date" "DJIA Closing Price"
2004-01-04 2004-01-10 0.956667 0.193333 0.203333 0.183333 2.41667 0.913333 4.19333 0.326667 0.0766667 1.12 0.12 0.00333333 0.0866667 0.31 2.19333 0.12 0.383333 0.21 0.23 0.02 0.04 0.0433333 0.05 0.183333 0.19 0.723333 0.206667 0.17 0.413333 0 0.383333 0.03 2.13333 0.02 0.29 0.606667 0.146667 0.04 0.85 0.936667 0.01 0.336667 0.426667 0.04 2.73333 0.0633333 0.886667 5.6 3.2 0.493333 0.0566667 0.0466667 0.363333 0.63 0.486667 0 0.0733333 0.503333 0.11 0.563333 0.08 1 4.85 0.09 0.06 2.74667 0.86 0.1 0.02 0.12 0.11 0.0966667 0.116667 0 0.293333 1.74667 0.6 0.07 0.293333 0.266667 0.93 0.236667 0.3 0 0 1.23667 0.126667 0.26 0.123333 0.343333 0.346667 0.706667 0.05 2.06 0.336667 2.02 1.91333 4.83333 2004-01-12 10485.18
cat: write error: Broken pipe

In [3]:

df = pd.read_csv('PreisMoatStanley_ScientificReports_3_1684_2013.dat', delimiter=' ')
columns = ['Google Start Date', 'Google End Date', 'debt', 'DJIA Date', 'DJIA Closing Price']
df = df[columns]
df.head()

Out[3]:

	Google Start Date	Google End Date	debt	DJIA Date	DJIA Closing Price
0	2004-01-04	2004-01-10	0.210000	2004-01-12	10485.18
1	2004-01-11	2004-01-17	0.210000	2004-01-20	10528.66
2	2004-01-18	2004-01-24	0.210000	2004-01-26	10702.51
3	2004-01-25	2004-01-31	0.213333	2004-02-02	10499.18
4	2004-02-01	2004-02-07	0.200000	2004-02-09	10579.03

In [4]:

debt_from_study = df[['debt']]
debt_from_study.index = df['Google End Date']
debt_from_study.index = debt_from_study.index.to_datetime()
debt_from_study.head()

Out[4]:

	debt
2004-01-10	0.210000
2004-01-17	0.210000
2004-01-24	0.210000
2004-01-31	0.213333
2004-02-07	0.200000

In [5]:

debt_us_as_of_20130528 = pd.read_csv('debt_US_google_trend.csv', index_col='date')
debt_us_as_of_20130528.index = debt_us_as_of_20130528.index.to_datetime()
debt_us_as_of_20130528.head()

Out[5]:

	debt
2004-01-10	24
2004-01-17	25
2004-01-24	24
2004-01-31	25
2004-02-07	23

In [6]:

debt_compare = pd.merge(debt_from_study, debt_us_as_of_20130528, left_index=True, right_index=True, how='outer')
debt_compare.head()

Out[6]:

	debt_x	debt_y
2004-01-10	0.210000	24
2004-01-17	0.210000	25
2004-01-24	0.210000	24
2004-01-31	0.213333	25
2004-02-07	0.200000	23

In [7]:

import matplotlib.pyplot as plt

fig, ax = plt.subplots()
ax2 = ax.twinx()

debt_compare.debt_x.plot(ax=ax, style='b-')
debt_compare.debt_y.plot(ax=ax2, style='r-', secondary_y=True)

Out[7]:

<matplotlib.axes.AxesSubplot at 0x344db10>

In [7]: