import plotly as py import pandas as pd import numpy as np from datetime import datetime from datetime import time as dt_tm from datetime import date as dt_date import plotly.plotly as py import plotly.tools as plotly_tools from plotly.graph_objs import * import os import tempfile os.environ['MPLCONFIGDIR'] = tempfile.mkdtemp() from matplotlib.finance import quotes_historical_yahoo import matplotlib.pyplot as plt from scipy.stats import gaussian_kde from IPython.display import HTML py.sign_in("jackp", "XXXX") x = [] y = [] ma = [] def moving_average(interval, window_size): window = np.ones(int(window_size))/float(window_size) return np.convolve(interval, window, 'same') date1 = dt_date( 2014, 1, 1 ) date2 = dt_date( 2014, 12, 12 ) quotes = quotes_historical_yahoo('AAPL', date1, date2) if len(quotes) == 0: print "Couldn't connect to yahoo trading database" else: dates = [q[0] for q in quotes] y = [q[1] for q in quotes] for date in dates: x.append(datetime.fromordinal(int(date))\ .strftime('%Y-%m-%d')) # Plotly timestamp format ma = moving_average(y, 10) xy_data = Scatter( x=x, y=y, mode='markers', marker=Marker(size=4), name='AAPL' ) # vvv clip first and last points of convolution mov_avg = Scatter( x=x[5:-4], y=ma[5:-4], \ line=Line(width=2,color='red',opacity=0.5), name='Moving average' ) data = Data([xy_data, mov_avg]) py.iplot(data, filename='apple stock moving average') first_plot_url = py.plot(data, filename='apple stock moving average', auto_open=False,) print first_plot_url tickers = ['AAPL', 'GE', 'IBM', 'KO', 'MSFT', 'PEP'] prices = [] for ticker in tickers: quotes = quotes_historical_yahoo(ticker, date1, date2) prices.append( [q[1] for q in quotes] ) df = pd.DataFrame( prices ).transpose() df.columns = tickers df.head() fig = plotly_tools.get_subplots(rows=6, columns=6, print_grid=True, horizontal_spacing= 0.05, vertical_spacing= 0.05) def kde_scipy(x, x_grid, bandwidth=0.4, **kwargs): """Kernel Density Estimation with Scipy""" # From https://jakevdp.github.io/blog/2013/12/01/kernel-density-estimation/ # Note that scipy weights its bandwidth by the covariance of the # input data. To make the results comparable to the other methods, # we divide the bandwidth by the sample standard deviation here. kde = gaussian_kde(x, bw_method=bandwidth / x.std(ddof=1), **kwargs) return kde.evaluate(x_grid) subplots = range(1,37) sp_index = 0 data = [] for i in range(1,7): x_ticker = df.columns[i-1] for j in range(1,7): y_ticker = df.columns[j-1] if i==j: x = df[x_ticker] x_grid = np.linspace(x.min(), x.max(), 100) sp = [ Histogram( x=x, histnorm='probability density' ), \ Scatter( x=x_grid, y=kde_scipy( x.as_matrix(), x_grid ), \ line=Line(width=2,color='red',opacity='0.5') ) ] else: sp = [ Scatter( x=df[x_ticker], y=df[y_ticker], mode='markers', marker=Marker(size=3) ) ] for ea in sp: ea.update( name='{0} vs {1}'.format(x_ticker,y_ticker),\ xaxis='x{}'.format(subplots[sp_index]),\ yaxis='y{}'.format(subplots[sp_index]) ) sp_index+=1 data += sp # Add x and y labels left_index = 1 bottom_index = 1 for tk in tickers: fig['layout']['xaxis{}'.format(left_index)].update( title=tk ) fig['layout']['yaxis{}'.format(bottom_index)].update( title=tk ) left_index=left_index+1 bottom_index=bottom_index+6 # Remove legend by updating 'layout' key fig['layout'].update(showlegend=False,height=1000,width=1000, title='Major technology and CPG stock prices in 2014') fig['data'] = Data(data) py.iplot(fig, height=1000, width=1000, filename='Major technology and CPG stock prices in 2014 - scatter matrix') second_plot_url = py.plot(fig, height=1000, width=1000, auto_open=False,\ filename='Major technology and CPG stock prices in 2014 - scatter matrix') print second_plot_url summary_table_1 = df.describe() summary_table_1 = summary_table_1\ .to_html()\ .replace('','
') # use bootstrap styling summary_table_2 = '''
TickerFull name
AAPL Apple Inc
GE General Electric Company
IBM International Business Machines Corp.
KO The Coca-Cola Company
MSFT Microsoft Corporation
PEP Pepsico, Inc.
''' HTML(summary_table_2) html_string = '''

2014 technology and CPG stock prices

Section 1: Apple Inc. (AAPL) stock in 2014

Apple stock price rose steadily through 2014.

Section 2: AAPL compared to other 2014 stocks

GE had the most predictable stock price in 2014. IBM had the highest mean stock price. \ The red lines are kernel density estimations of each stock price - the peak of each red lines \ corresponds to its mean stock price for 2014 on the x axis.

Reference table: stock tickers

''' + summary_table_2 + '''

Summary table: 2014 stock statistics

''' + summary_table_1 + ''' ''' f = open('/home/jack/report.html','w') f.write(html_string) f.close()