%load_ext watermark
%watermark
from copy import deepcopy
import pickle
import os
import pandas as pd
import pandas_datareader.data as web
from pandas.tseries.offsets import Week
import numpy as np
import sklearn.mixture as mix
from sklearn.externals import joblib
import scipy.stats as scs
from math import ceil
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(font_scale=1.25)
style_kwds = {'xtick.major.size': 3, 'ytick.major.size': 3,
'font.family':'Courier Prime Code', 'legend.frameon': True} #
sns.set_style('white', style_kwds)
import missingno as msno
from tqdm import tqdm
import affirm
import warnings
warnings.filterwarnings("ignore")
import re
p=print
p()
%watermark -p pandas,pandas_datareader,numpy,scipy,sklearn,matplotlib,seaborn
2017-04-25T17:31:23-06:00 CPython 3.6.0 IPython 5.1.0 compiler : GCC 4.4.7 20120313 (Red Hat 4.4.7-1) system : Linux release : 4.4.0-72-generic machine : x86_64 processor : x86_64 CPU cores : 8 interpreter: 64bit pandas 0.19.2 pandas_datareader 0.3.0.post numpy 1.12.1 scipy 0.19.0 sklearn 0.18.1 matplotlib 2.0.0 seaborn 0.7.1
df = pd.read_hdf('GMM_Results_TidyData.h5', 'table')
# can view individual results
df.query('sym == "SPY"').groupby(['k', 'steps','lookback']).mean().T
k | 2 | ... | 21 | ||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
steps | 1 | 2 | 3 | 5 | ... | 5 | 7 | 10 | 21 | ||||||||||||
lookback | 1 | 3 | 999 | 1 | 3 | 999 | 1 | 3 | 999 | 1 | ... | 999 | 1 | 3 | 999 | 1 | 3 | 999 | 1 | 3 | 999 |
accuracy | 0.72406 | 0.751246 | 0.628002 | 0.72406 | 0.751246 | 0.628002 | 0.72406 | 0.751246 | 0.628002 | 0.72406 | ... | 0.681921 | 0.435433 | 0.599909 | 0.681921 | 0.435433 | 0.599909 | 0.681921 | 0.435433 | 0.599909 | 0.681921 |
mean | 0.00130 | 0.001100 | 0.000900 | 0.00060 | 0.001500 | 0.000500 | 0.00120 | 0.001400 | 0.000200 | 0.00330 | ... | 0.001900 | 0.001600 | 0.004400 | 0.001900 | 0.002100 | 0.004500 | 0.001900 | 0.003300 | 0.001700 | 0.001000 |
median | 0.00130 | 0.002000 | 0.001300 | 0.00070 | 0.002100 | 0.000800 | 0.00310 | 0.003700 | 0.002800 | 0.00640 | ... | 0.005000 | 0.004100 | 0.005200 | 0.004800 | 0.006000 | 0.008000 | 0.007200 | 0.011400 | 0.009500 | 0.013300 |
max_ | 0.04550 | 0.052500 | 0.060500 | 0.06820 | 0.119500 | 0.119500 | 0.09320 | 0.126900 | 0.126900 | 0.07180 | ... | 0.107800 | 0.122400 | 0.142900 | 0.160000 | 0.153100 | 0.153100 | 0.195400 | 0.173800 | 0.151200 | 0.188400 |
min_ | -0.04580 | -0.077100 | -0.077100 | -0.07360 | -0.097900 | -0.097900 | -0.14370 | -0.143700 | -0.143700 | -0.18320 | ... | -0.209600 | -0.189100 | -0.249100 | -0.271100 | -0.311500 | -0.317900 | -0.348800 | -0.427000 | -0.427000 | -0.418000 |
gt0_cnt | 118.00000 | 110.000000 | 178.000000 | 108.00000 | 109.000000 | 168.000000 | 122.00000 | 118.000000 | 179.000000 | 130.00000 | ... | 154.000000 | 338.000000 | 212.000000 | 144.000000 | 345.000000 | 213.000000 | 152.000000 | 354.000000 | 211.000000 | 155.000000 |
lt0_cnt | 89.00000 | 87.000000 | 140.000000 | 99.00000 | 88.000000 | 150.000000 | 85.00000 | 79.000000 | 139.000000 | 77.00000 | ... | 104.000000 | 237.000000 | 144.000000 | 114.000000 | 230.000000 | 143.000000 | 106.000000 | 221.000000 | 145.000000 | 103.000000 |
sum_gt0 | 1.01060 | 1.185000 | 1.761400 | 1.35450 | 1.732300 | 2.472000 | 1.80610 | 2.072300 | 2.998200 | 2.31640 | ... | 2.959200 | 6.005200 | 4.607100 | 3.430500 | 7.510500 | 5.393200 | 4.055300 | 11.036800 | 7.086000 | 5.900800 |
sum_lt0 | -0.73200 | -0.975700 | -1.475400 | -1.22740 | -1.439600 | -2.297600 | -1.55860 | -1.795600 | -2.945000 | -1.62370 | ... | -2.476800 | -5.058400 | -3.029000 | -2.944600 | -6.281200 | -3.800000 | -3.572800 | -9.167500 | -6.470100 | -5.639900 |
sum_ratio | 1.38060 | 1.214600 | 1.193900 | 1.10350 | 1.203300 | 1.075900 | 1.15880 | 1.154100 | 1.018100 | 1.42660 | ... | 1.194800 | 1.187200 | 1.521000 | 1.165000 | 1.195700 | 1.419300 | 1.135100 | 1.203900 | 1.095200 | 1.046200 |
gt_pct | 0.57000 | 0.558400 | 0.559700 | 0.52170 | 0.553300 | 0.528300 | 0.58940 | 0.599000 | 0.562900 | 0.62800 | ... | 0.596900 | 0.587800 | 0.595500 | 0.558100 | 0.600000 | 0.598300 | 0.589100 | 0.615700 | 0.592700 | 0.600800 |
lt_pct | 0.43000 | 0.441600 | 0.440300 | 0.47830 | 0.446700 | 0.471700 | 0.41060 | 0.401000 | 0.437100 | 0.37200 | ... | 0.403100 | 0.412200 | 0.404500 | 0.441900 | 0.400000 | 0.401700 | 0.410900 | 0.384300 | 0.407300 | 0.399200 |
12 rows × 168 columns
df.query('sym == "SPY"').groupby(['k', 'steps','lookback'])['sum_ratio'].mean().unstack()
lookback | 1 | 3 | 999 | |
---|---|---|---|---|
k | steps | |||
2 | 1 | 1.3806 | 1.2146 | 1.1939 |
2 | 1.1035 | 1.2033 | 1.0759 | |
3 | 1.1588 | 1.1541 | 1.0181 | |
5 | 1.4266 | 1.2988 | 1.2355 | |
7 | 1.3061 | 1.2181 | 1.1933 | |
10 | 1.2528 | 1.1140 | 1.1013 | |
21 | 1.1795 | 0.9939 | 1.0623 | |
3 | 1 | 1.5326 | 1.3799 | 1.1629 |
2 | 1.2588 | 1.0730 | 0.9559 | |
3 | 1.2554 | 1.0362 | 0.9248 | |
5 | 1.4943 | 1.1174 | 1.0305 | |
7 | 1.3562 | 1.1055 | 1.0608 | |
10 | 1.3104 | 1.0478 | 0.9581 | |
21 | 1.1694 | 1.0987 | 1.0401 | |
5 | 1 | 1.2299 | 1.3337 | 1.1743 |
2 | 1.1551 | 1.0618 | 0.9798 | |
3 | 1.1248 | 1.1060 | 0.9332 | |
5 | 1.4150 | 1.4754 | 1.1586 | |
7 | 1.3135 | 1.4061 | 1.2162 | |
10 | 1.1943 | 1.3099 | 1.0387 | |
21 | 1.0959 | 1.1919 | 0.8289 | |
7 | 1 | 1.0653 | 1.5026 | 1.2731 |
2 | 1.0596 | 1.0715 | 1.0788 | |
3 | 1.1546 | 1.0525 | 1.1063 | |
5 | 1.4114 | 1.4787 | 1.3386 | |
7 | 1.2811 | 1.4847 | 1.2621 | |
10 | 1.2368 | 1.3933 | 1.2419 | |
21 | 1.3120 | 1.1530 | 1.0946 | |
9 | 1 | 1.0479 | 1.3456 | 1.3508 |
2 | 1.0196 | 1.0609 | 1.1059 | |
3 | 1.1100 | 1.0403 | 1.1713 | |
5 | 1.3666 | 1.2846 | 1.4543 | |
7 | 1.3124 | 1.1804 | 1.4201 | |
10 | 1.2260 | 1.1591 | 1.2543 | |
21 | 1.2077 | 1.0994 | 1.2401 | |
13 | 1 | 1.1263 | 1.1889 | 1.4211 |
2 | 1.0648 | 1.0269 | 1.1443 | |
3 | 1.1574 | 1.0529 | 1.2717 | |
5 | 1.3378 | 1.2382 | 1.3161 | |
7 | 1.2053 | 1.1437 | 1.2919 | |
10 | 1.1588 | 1.1268 | 1.1386 | |
21 | 1.3624 | 1.0367 | 0.9793 | |
17 | 1 | 1.2072 | 1.2717 | 1.1266 |
2 | 1.2261 | 1.1880 | 0.8662 | |
3 | 1.1569 | 1.2243 | 0.9143 | |
5 | 1.2874 | 1.2782 | 1.1725 | |
7 | 1.3338 | 1.0865 | 1.0402 | |
10 | 1.3186 | 1.1162 | 0.9493 | |
21 | 1.3412 | 1.0565 | 0.7675 | |
21 | 1 | 1.1376 | 1.4016 | 1.3197 |
2 | 1.1715 | 1.3162 | 1.0202 | |
3 | 1.1335 | 1.3548 | 1.0144 | |
5 | 1.2000 | 1.5998 | 1.1948 | |
7 | 1.1872 | 1.5210 | 1.1650 | |
10 | 1.1957 | 1.4193 | 1.1351 | |
21 | 1.2039 | 1.0952 | 1.0462 |
ser = df.groupby(['sym', 'lookback'])['median'].mean().unstack().mean(axis=1)
ser
sym EEM 0.000977 EFA 0.002130 GLD 0.000771 QQQ 0.005810 SPY 0.004829 TLT 0.002437 dtype: float64
# median returns by K
df1 = df.groupby(['sym', 'k'])['median'].mean().unstack()#.mean(axis=1)
df1
k | 2 | 3 | 5 | 7 | 9 | 13 | 17 | 21 |
---|---|---|---|---|---|---|---|---|
sym | ||||||||
EEM | 0.004195 | 0.000376 | 0.001681 | 0.001310 | -0.000805 | 0.000771 | 0.000086 | 0.000205 |
EFA | 0.003414 | 0.000990 | 0.002214 | 0.001810 | 0.002000 | 0.001990 | 0.001495 | 0.003129 |
GLD | 0.000843 | 0.000952 | 0.000886 | -0.000067 | 0.000867 | 0.001314 | 0.000581 | 0.000795 |
QQQ | 0.006552 | 0.006690 | 0.005933 | 0.006000 | 0.005852 | 0.004971 | 0.005243 | 0.005233 |
SPY | 0.006505 | 0.005195 | 0.004957 | 0.004757 | 0.004824 | 0.004262 | 0.003500 | 0.004629 |
TLT | 0.003152 | 0.002310 | 0.001986 | 0.002248 | 0.002724 | 0.002233 | 0.002805 | 0.002038 |
# median returns by step
df1 = df.groupby(['sym', 'steps'])['median'].mean().unstack()#.mean(axis=1)
df1
steps | 1 | 2 | 3 | 5 | 7 | 10 | 21 |
---|---|---|---|---|---|---|---|
sym | |||||||
EEM | 0.001596 | 0.000096 | -0.000592 | 0.002512 | 0.002221 | 0.003883 | -0.002875 |
EFA | 0.000558 | 0.000412 | -0.000350 | 0.001842 | 0.005412 | 0.005479 | 0.001558 |
GLD | 0.001063 | 0.000758 | 0.001408 | 0.000546 | -0.001042 | -0.000538 | 0.003204 |
QQQ | 0.000217 | 0.000937 | 0.003329 | 0.006446 | 0.005383 | 0.009713 | 0.014642 |
SPY | 0.001033 | 0.000675 | 0.002500 | 0.005063 | 0.004925 | 0.007425 | 0.012179 |
TLT | 0.001521 | 0.000921 | 0.000821 | 0.002388 | 0.003367 | 0.002712 | 0.005329 |
# sum ratio by step
df2 = df.groupby(['sym', 'steps'])['sum_ratio'].mean().unstack()#.mean(axis=1)
df2
steps | 1 | 2 | 3 | 5 | 7 | 10 | 21 |
---|---|---|---|---|---|---|---|
sym | |||||||
EEM | 1.370908 | 1.069050 | 1.142558 | 1.218625 | 1.076463 | 0.993300 | 0.658767 |
EFA | 1.261367 | 0.930817 | 0.925383 | 1.030763 | 1.040233 | 0.966746 | 0.670408 |
GLD | 1.090254 | 1.015225 | 1.031525 | 0.989417 | 1.011271 | 1.036229 | 1.160317 |
QQQ | 1.141629 | 1.113429 | 1.191242 | 1.367733 | 1.317471 | 1.274929 | 1.155454 |
SPY | 1.266183 | 1.095317 | 1.109442 | 1.317129 | 1.253800 | 1.183212 | 1.110679 |
TLT | 1.255213 | 1.123088 | 1.160733 | 1.350704 | 1.349554 | 1.461875 | 1.469208 |
# fun with facet grids
g = sns.FacetGrid(df, col="sym", hue="lookback", col_wrap=3)
g.map(plt.hist, "sum_ratio", alpha=0.5);
g.add_legend();
for ax in g.axes.flat:
ax.axvline(1, color='k', lw=1, ls='--')
g.fig.savefig('facetgrid-lookback-steps-sum-ratio-hist.png', dpi=300, bbox_inches='tight')
g = sns.FacetGrid(df, col="sym", hue="lookback", col_wrap=3)
g.map(plt.hist, "median", alpha=0.5);
g.add_legend();
for ax in g.axes.flat:
ax.axvline(0, color='k', lw=1, ls='--')
g.fig.savefig('facetgrid-lookback-steps-median-return-hist.png', dpi=300, bbox_inches='tight')
g = sns.FacetGrid(df, col="sym", hue="lookback", col_wrap=3)
g.map(sns.barplot, "steps", "median", ci=None, alpha=0.5)
g.add_legend();
for ax in g.axes.flat:
ax.axhline(0, color='k', lw=1, ls='--')
g.fig.savefig('facetgrid-lookback-steps-median-return-barplot.png', dpi=300, bbox_inches='tight')
g = sns.FacetGrid(df, col="sym", hue="lookback", col_wrap=3)
g.map(sns.barplot, "steps", "sum_ratio", ci=None, alpha=0.5)
g.add_legend();
for ax in g.axes.flat:
ax.axhline(1., color='k', lw=1, ls='--')
g.fig.savefig('facetgrid-lookback-steps-sum-ratio-barplot.png', dpi=300, bbox_inches='tight')
g = sns.FacetGrid(df, col="sym", hue="lookback", col_wrap=3)
g.map(sns.barplot, "k", "median", ci=None, alpha=0.5)
g.add_legend();
for ax in g.axes.flat:
ax.axhline(0, color='k', lw=1, ls='--')
g.fig.savefig('facetgrid-lookback-k-median-return-barplot.png', dpi=300, bbox_inches='tight')
g = sns.FacetGrid(df, col="sym", hue="lookback", col_wrap=3)
g.map(sns.barplot, "k", "sum_ratio", ci=None, alpha=0.5)
g.add_legend();
for ax in g.axes.flat:
ax.axhline(1., color='k', lw=1, ls='--')
g.fig.savefig('facetgrid-lookback-k-sum-ratio-barplot.png', dpi=300, bbox_inches='tight')