%matplotlib inline import numpy as np import pandas as pd import seaborn as sns from scipy import stats from zipline.data import load_from_yahoo stocks = ['XLY', 'XLF', 'XLK', 'XLE', 'XLV', 'XLI', 'XLP', 'XLB', 'XLU', 'SPY'] end_date = pd.datetools.datetime(2014, 5, 1) # Ensure there is enough data (number of days: 800) start_date = end_date - pd.DateOffset(n=800) data = load_from_yahoo(stocks=stocks, start=start_date, end=end_date) data.plot() print data.index[0] print data.index[-1] prices = data.as_matrix()[:400] prices.shape changes_all = stats.zscore(prices, axis=0, ddof=1) changes_all.shape # Quick look at the shape changes = changes_all[:,0:-1] - np.tile(changes_all[:,-1], (len(stocks)-1,1)).T changes = changes > 0 changes = changes.astype(np.int) changes.shape X = np.split(changes[:,0], 20) X changes[:,0] Y = X[-1] Y full_dates = data.index[:400] dates = [] for date in full_dates: new_date = str(date)[:10] dates.append(new_date) dates = np.array(dates) dates[:10] dates_split = np.array(np.split(dates, 20)) dates_split.shape dates_split[:2] dates_labels = dates_split[-1] dates_labels for x_dates, label in zip(dates_split, dates_labels): print 'Target:', label print 'Training:', x_dates print ''