import pandas as pd
import numpy as np
Data used by researchers in the study:
!cat PreisMoatStanley_ScientificReports_3_1684_2013.dat | head -n 2
"Google Start Date" "Google End Date" "arts" "banking" "bonds" "bubble" "buy" "cancer" "car" "cash" "chance" "color" "conflict" "consume" "consumption" "crash" "credit" "crisis" "culture" "debt" "default" "derivatives" "dividend" "dow jones" "earnings" "economics" "economy" "energy" "environment" "fed" "finance" "financial markets" "fine" "fond" "food" "forex" "freedom" "fun" "gain" "gains" "garden" "gold" "greed" "growth" "happy" "headlines" "health" "hedge" "holiday" "home" "house" "housing" "inflation" "invest" "investment" "kitchen" "labor" "leverage" "lifestyle" "loss" "markets" "marriage" "metals" "money" "movie" "nasdaq" "nyse" "office" "oil" "opportunity" "ore" "politics" "portfolio" "present" "profit" "rare earths" "religion" "restaurant" "return" "returns" "revenue" "rich" "ring" "risk" "sell" "short sell" "short selling" "society" "stock market" "stocks" "success" "tourism" "trader" "train" "transaction" "travel" "unemployment" "war" "water" "world" "DJIA Date" "DJIA Closing Price" 2004-01-04 2004-01-10 0.956667 0.193333 0.203333 0.183333 2.41667 0.913333 4.19333 0.326667 0.0766667 1.12 0.12 0.00333333 0.0866667 0.31 2.19333 0.12 0.383333 0.21 0.23 0.02 0.04 0.0433333 0.05 0.183333 0.19 0.723333 0.206667 0.17 0.413333 0 0.383333 0.03 2.13333 0.02 0.29 0.606667 0.146667 0.04 0.85 0.936667 0.01 0.336667 0.426667 0.04 2.73333 0.0633333 0.886667 5.6 3.2 0.493333 0.0566667 0.0466667 0.363333 0.63 0.486667 0 0.0733333 0.503333 0.11 0.563333 0.08 1 4.85 0.09 0.06 2.74667 0.86 0.1 0.02 0.12 0.11 0.0966667 0.116667 0 0.293333 1.74667 0.6 0.07 0.293333 0.266667 0.93 0.236667 0.3 0 0 1.23667 0.126667 0.26 0.123333 0.343333 0.346667 0.706667 0.05 2.06 0.336667 2.02 1.91333 4.83333 2004-01-12 10485.18 cat: write error: Broken pipe
df = pd.read_csv('PreisMoatStanley_ScientificReports_3_1684_2013.dat', delimiter=' ')
columns = ['Google Start Date', 'Google End Date', 'debt', 'DJIA Date', 'DJIA Closing Price']
df = df[columns]
df.head()
Google Start Date | Google End Date | debt | DJIA Date | DJIA Closing Price | |
---|---|---|---|---|---|
0 | 2004-01-04 | 2004-01-10 | 0.210000 | 2004-01-12 | 10485.18 |
1 | 2004-01-11 | 2004-01-17 | 0.210000 | 2004-01-20 | 10528.66 |
2 | 2004-01-18 | 2004-01-24 | 0.210000 | 2004-01-26 | 10702.51 |
3 | 2004-01-25 | 2004-01-31 | 0.213333 | 2004-02-02 | 10499.18 |
4 | 2004-02-01 | 2004-02-07 | 0.200000 | 2004-02-09 | 10579.03 |
debt_from_study = df[['debt']]
debt_from_study.index = df['Google End Date']
debt_from_study.index = debt_from_study.index.to_datetime()
debt_from_study.head()
debt | |
---|---|
2004-01-10 | 0.210000 |
2004-01-17 | 0.210000 |
2004-01-24 | 0.210000 |
2004-01-31 | 0.213333 |
2004-02-07 | 0.200000 |
debt_us_as_of_20130528 = pd.read_csv('debt_US_google_trend.csv', index_col='date')
debt_us_as_of_20130528.index = debt_us_as_of_20130528.index.to_datetime()
debt_us_as_of_20130528.head()
debt | |
---|---|
2004-01-10 | 24 |
2004-01-17 | 25 |
2004-01-24 | 24 |
2004-01-31 | 25 |
2004-02-07 | 23 |
debt_compare = pd.merge(debt_from_study, debt_us_as_of_20130528, left_index=True, right_index=True, how='outer')
debt_compare.head()
debt_x | debt_y | |
---|---|---|
2004-01-10 | 0.210000 | 24 |
2004-01-17 | 0.210000 | 25 |
2004-01-24 | 0.210000 | 24 |
2004-01-31 | 0.213333 | 25 |
2004-02-07 | 0.200000 | 23 |
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax2 = ax.twinx()
debt_compare.debt_x.plot(ax=ax, style='b-')
debt_compare.debt_y.plot(ax=ax2, style='r-', secondary_y=True)
<matplotlib.axes.AxesSubplot at 0x344db10>