In [1]:
import pandas as pd
import numpy as np

Data used by researchers in the study:

In [2]:
!cat PreisMoatStanley_ScientificReports_3_1684_2013.dat | head -n 2
"Google Start Date" "Google End Date" "arts" "banking" "bonds" "bubble" "buy" "cancer" "car" "cash" "chance" "color" "conflict" "consume" "consumption" "crash" "credit" "crisis" "culture" "debt" "default" "derivatives" "dividend" "dow jones" "earnings" "economics" "economy" "energy" "environment" "fed" "finance" "financial markets" "fine" "fond" "food" "forex" "freedom" "fun" "gain" "gains" "garden" "gold" "greed" "growth" "happy" "headlines" "health" "hedge" "holiday" "home" "house" "housing" "inflation" "invest" "investment" "kitchen" "labor" "leverage" "lifestyle" "loss" "markets" "marriage" "metals" "money" "movie" "nasdaq" "nyse" "office" "oil" "opportunity" "ore" "politics" "portfolio" "present" "profit" "rare earths" "religion" "restaurant" "return" "returns" "revenue" "rich" "ring" "risk" "sell" "short sell" "short selling" "society" "stock market" "stocks" "success" "tourism" "trader" "train" "transaction" "travel" "unemployment" "war" "water" "world" "DJIA Date" "DJIA Closing Price"
2004-01-04 2004-01-10 0.956667 0.193333 0.203333 0.183333 2.41667 0.913333 4.19333 0.326667 0.0766667 1.12 0.12 0.00333333 0.0866667 0.31 2.19333 0.12 0.383333 0.21 0.23 0.02 0.04 0.0433333 0.05 0.183333 0.19 0.723333 0.206667 0.17 0.413333 0 0.383333 0.03 2.13333 0.02 0.29 0.606667 0.146667 0.04 0.85 0.936667 0.01 0.336667 0.426667 0.04 2.73333 0.0633333 0.886667 5.6 3.2 0.493333 0.0566667 0.0466667 0.363333 0.63 0.486667 0 0.0733333 0.503333 0.11 0.563333 0.08 1 4.85 0.09 0.06 2.74667 0.86 0.1 0.02 0.12 0.11 0.0966667 0.116667 0 0.293333 1.74667 0.6 0.07 0.293333 0.266667 0.93 0.236667 0.3 0 0 1.23667 0.126667 0.26 0.123333 0.343333 0.346667 0.706667 0.05 2.06 0.336667 2.02 1.91333 4.83333 2004-01-12 10485.18
cat: write error: Broken pipe
In [3]:
df = pd.read_csv('PreisMoatStanley_ScientificReports_3_1684_2013.dat', delimiter=' ')
columns = ['Google Start Date', 'Google End Date', 'debt', 'DJIA Date', 'DJIA Closing Price']
df = df[columns]
df.head()
Out[3]:
Google Start Date Google End Date debt DJIA Date DJIA Closing Price
0 2004-01-04 2004-01-10 0.210000 2004-01-12 10485.18
1 2004-01-11 2004-01-17 0.210000 2004-01-20 10528.66
2 2004-01-18 2004-01-24 0.210000 2004-01-26 10702.51
3 2004-01-25 2004-01-31 0.213333 2004-02-02 10499.18
4 2004-02-01 2004-02-07 0.200000 2004-02-09 10579.03
In [4]:
debt_from_study = df[['debt']]
debt_from_study.index = df['Google End Date']
debt_from_study.index = debt_from_study.index.to_datetime()
debt_from_study.head()
Out[4]:
debt
2004-01-10 0.210000
2004-01-17 0.210000
2004-01-24 0.210000
2004-01-31 0.213333
2004-02-07 0.200000
In [5]:
debt_us_as_of_20130528 = pd.read_csv('debt_US_google_trend.csv', index_col='date')
debt_us_as_of_20130528.index = debt_us_as_of_20130528.index.to_datetime()
debt_us_as_of_20130528.head()
Out[5]:
debt
2004-01-10 24
2004-01-17 25
2004-01-24 24
2004-01-31 25
2004-02-07 23
In [6]:
debt_compare = pd.merge(debt_from_study, debt_us_as_of_20130528, left_index=True, right_index=True, how='outer')
debt_compare.head()
Out[6]:
debt_x debt_y
2004-01-10 0.210000 24
2004-01-17 0.210000 25
2004-01-24 0.210000 24
2004-01-31 0.213333 25
2004-02-07 0.200000 23
In [7]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots()
ax2 = ax.twinx()

debt_compare.debt_x.plot(ax=ax, style='b-')
debt_compare.debt_y.plot(ax=ax2, style='r-', secondary_y=True)
Out[7]:
<matplotlib.axes.AxesSubplot at 0x344db10>
In [7]: