Sources of Open Data

In [1]:
import addutils.toc ; addutils.toc.js(ipy_notebook=True)
Out[1]:
In [2]:
import datetime
import scipy.io
import numpy as np
import pandas as pd

import pandas_datareader.data as web
import bokeh.plotting as bk
from IPython.display import display
bk.output_notebook()
Loading BokehJS ...
In [3]:
from addutils import css_notebook
css_notebook()
Out[3]:

1 Yahoo! Finance

In [4]:
start = datetime.datetime(2000, 1, 1)
end = datetime.datetime(2014, 5, 14)
AAPL_yahoo = web.DataReader("AAPL", 'yahoo', start, end)
IBM_yahoo = web.DataReader("IBM", 'yahoo', start, end)

1.1 Plotting timeseries with bokeh:

In [5]:
fig = bk.figure(x_axis_type = "datetime",
       tools="pan,box_zoom,reset", title = 'Closing Prices  - From Yahoo! Finance',
       plot_width=750, plot_height=400)
fig.line(AAPL_yahoo.index, AAPL_yahoo['Adj Close'],
     line_width=2, color='darkred', legend='Apple')
fig.line(IBM_yahoo.index, IBM_yahoo['Adj Close'],
     line_width=2, color='royalblue', legend='IBM')
fig.legend.location = "top_left"
bk.show(fig)
In [6]:
df = AAPL_yahoo[-100:]
mids = (df['High']+df['Low'])/2
spans = df['Close']-df['Open']
inc = df['Close']>=df['Open']
dec = df['Close']<df['Open']
w = 18*60*60*1000               # 2/3 of a day in ms

1.2 Plotting candlesticks with bokeh:

In [7]:
fig = bk.figure(x_axis_type = "datetime",
       tools="pan,box_zoom,reset",
       title = 'Candlesticks  - From Yahoo! Finance',
       plot_width=750, plot_height=400)
fig.segment(df.index, df['High'], df.index, df['Low'], color='black')
fig.rect(df[inc].index, mids[inc], w, spans[inc],
     fill_color="lawngreen", line_color="black")
fig.rect(df[dec].index, mids[dec], w, spans[dec],
     fill_color="crimson", line_color="black")
fig.legend.location = "top_left"
bk.show(fig)

1.3 Plotting data ranges with bokeh:

In [8]:
x = df.index.append(df.iloc[::-1].index)
y = df['Low'].append(df['High'].iloc[::-1])

fig = bk.figure(x_axis_type = "datetime",
       tools="pan,box_zoom,reset",
       title = "Mid Trading Values with Range  - From Yahoo! Finance",
       plot_width=750, plot_height=400)
fig.patch(x,y, color='darkred', alpha=0.3, line_color='violet', line_alpha=0.6)
fig.line(df.index, mids, line_width=2, color='darkred', legend='Apple')
bk.show(fig)

1.4 Plotting multiple plots with matplotlib:

In [9]:
%matplotlib inline
import matplotlib.pyplot as plt
In [10]:
import matplotlib.gridspec as gridspec
fig = plt.figure(figsize=(15,10));
fig = bk.figure(plot_width=300, plot_height=200)
gs = gridspec.GridSpec(2, 2, height_ratios=[2,1], hspace=0.4)
ax0 = plt.subplot(gs[0, :])
ax1 = plt.subplot(gs[1, :])
AAPL_yahoo.iloc[-200:,1:3].plot(ax=ax0)
AAPL_yahoo['r_mean'] = AAPL_yahoo['High'].rolling(window=20, center=False).mean()
AAPL_yahoo[['High', 'r_mean']][-200:].plot(ax=ax1)
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fc098ad0b00>

1.5 Plotting multiple plots with Bokeh:

In [11]:
ax0 = bk.figure(plot_width=700, plot_height=300, title=None)
data0 = AAPL_yahoo.iloc[-200:,1:3]
ax0.line(x=np.arange(len(data0)), y=data0['High'], 
         legend='High', line_color='blue')
ax0.line(x=np.arange(len(data0)), y=data0['Low'], 
         legend='Low', line_color='green')

ax1 = bk.figure(plot_width=700, plot_height=300, title=None)
AAPL_yahoo['r_mean'] = AAPL_yahoo['High'].rolling(window=20, center=False).mean()
data1 = AAPL_yahoo.iloc[-200:]
ax1.line(x=np.arange(len(data1)), y=data1['High'],
         legend='High', line_color='blue')
ax1.line(x=np.arange(len(data1)), y=data1['r_mean'],
        legend='r_mean', line_color='green')

fig = bk.gridplot([[ax0], [ax1]])
bk.show(fig)

2 Quandl

In [12]:
AAPL_google = web.DataReader("AAPL", 'quandl')
IBM_google = web.DataReader("IBM", 'quandl')
In [13]:
fig = bk.figure(x_axis_type = "datetime",
       tools="pan,box_zoom,reset", title='Closing Prices - From Google Finance',
       plot_width=750, plot_height=400)
fig.line(AAPL_google.index, AAPL_google['Close'],
         line_width=2, color='darkred', legend='Apple')
fig.line(IBM_google.index, IBM_google['Close'],
         line_width=2, color='royalblue', legend='IBM')
fig.legend.location = "top_left"
bk.show(fig)

3 Federal Reserve Economic Data

Federal Reserve Economic Data (FRED) is an online database consisting of more than 148,000 economic data time series from 59 national, international, public, and private sources.

In [14]:
start = datetime.datetime(2005, 1, 1)
end = datetime.datetime(2014, 2, 1)
# LMUNRLTTATM647N Registered Unemployment Level for Austria
# LMUNRLTTDKM647N Registered Unemployment Level for Denmark
# LMUNRLTTDEM647N Registered Unemployment Level for Germany
# LMUNRLTTIEM647N Registered Unemployment Level for Ireland
unemployment = web.DataReader(['LMUNRLTTATM647N', 'LMUNRLTTDKM647N', 'LMUNRLTTDEM647N',
                            'LMUNRLTTIEM647N'], 'fred', start, end)
In [15]:
from bokeh.models.formatters import NumeralTickFormatter

fig = bk.figure(x_axis_type = "datetime",
               tools="pan,box_zoom,reset", title = 'Unemployment - Europe',
               plot_width=750, plot_height=300)
fig.line(unemployment.index, unemployment['LMUNRLTTATM647N'], 
         color='#800080', line_width=2, legend='Austria')
fig.line(unemployment.index, unemployment['LMUNRLTTDKM647N'],
         color='#008B8B', line_width=2, legend='Denmark')
fig.line(unemployment.index, unemployment['LMUNRLTTDEM647N'],
         color='#FF0000', line_width=2, legend='Germany')
fig.line(unemployment.index, unemployment['LMUNRLTTIEM647N'],
         color='#2E8B57', line_width=2, legend='Ireland')
fig.xaxis.major_label_text_font_size = "12pt"
fig.yaxis.axis_label = "Registered Unemployment Level"
fig.xaxis.axis_label = "Year"
fig.legend.location = "top_right"
fig.yaxis[0].formatter = NumeralTickFormatter(format='0,0')
bk.show(fig)

4 World Bank

The World Bank’s World Development Indicators is a huge database with thousands of panel data series. With Pandas it's possible to use the search function in pandas.io.wb to find your data:

In [16]:
from pandas_datareader import wb

If you wanted to compare the Gross Domestic Products per capita in constant dollars in North America:

In [17]:
wb.search('gdp.*capita.*const')
Out[17]:
id name source sourceNote sourceOrganization topics
646 6.0.GDPpc_constant GDP per capita, PPP (constant 2011 internation... LAC Equity Lab GDP per capita based on purchasing power parit... b'World Development Indicators (World Bank)' Economy & Growth
8129 NY.GDP.PCAP.KD GDP per capita (constant 2010 US$) World Development Indicators GDP per capita is gross domestic product divid... b'World Bank national accounts data, and OECD ... Economy & Growth
8131 NY.GDP.PCAP.KN GDP per capita (constant LCU) World Development Indicators GDP per capita is gross domestic product divid... b'World Bank national accounts data, and OECD ... Economy & Growth
8133 NY.GDP.PCAP.PP.KD GDP per capita, PPP (constant 2011 internation... World Development Indicators GDP per capita based on purchasing power parit... b'World Bank, International Comparison Program... Economy & Growth
8134 NY.GDP.PCAP.PP.KD.87 GDP per capita, PPP (constant 1987 internation... WDI Database Archives b''
In [18]:
dat = wb.download(indicator='NY.GDP.PCAP.KD', country=['all'], start=2005, end=2012)
In [19]:
dat.head(10)
Out[19]:
NY.GDP.PCAP.KD
country year
Arab World 2012 6154.555744
2011 5986.753035
2010 5917.512227
2009 5783.469395
2008 5899.709921
2007 5711.663247
2006 5594.898519
2005 5378.379169
Caribbean small states 2012 8822.045233
2011 8766.849448
In [20]:
grouped = dat['NY.GDP.PCAP.KD'].groupby(level=0).mean()
grouped.sort_values(axis=0, ascending=False, inplace=True)
grouped = grouped.dropna()
sample = 3
rich_poor = grouped.iloc[:sample].append(grouped.iloc[-sample:])
rich_poor.values
Out[20]:
array([144246.36877497, 141165.08285671, 105180.52456616,    310.23214478,
          307.9347033 ,    228.65002423])
In [21]:
index = list(rich_poor.index)
fig = bk.figure(plot_width=750, plot_height=500, 
                title='Gross Domestic Products per capita - Rich vs Poor',
                x_range=index, y_axis_type='log', y_range=(0.01, 10**6))
fig.yaxis.minor_tick_out = 0
fig.xaxis.major_tick_out = 0
fig.segment(index, 0.01, index, rich_poor.values, line_width=35, color='#4C72B0')
bk.show(fig)

Visit www.add-for.com for more tutorials and updates.

This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.