#!/usr/bin/env python # coding: utf-8 #

1 Chapter Goals
2 Chapter Outline
3 Algo Evaluation Motivation
- 3.1 Algorithm Uncertainty (AU)
- 3.2 Model Uncertainty (MU)
4 Read in backtest portfolio equity data.
5 Choose the Best Algorithm Among 4 Variants
6 Choose Best Bayesian Model of Algorithm Returns
- 6.1 Model Sampling and Comparison
7 Comparing Bayesian Cones for all Algos and all Return Models
- 7.1 Some observations:
8 Compare Best Algo's Predicted Portfolio Ending Values
9 Compare Best Algo Predicted CAGR Distributions
10 Model Averaging
11 Conclusions

# ## Chapter Goals # # * Demonstrate how to extract algorithm portfolio equity from [Quantconnect](https://www.quantconnect.com/) backtest # * Demonstrate how to predict future return paths using [bayesian cones](https://blog.quantopian.com/bayesian-cone/). # * Demonstrate how to estimate distribution of algorithm CAGRs. # * Demonstrate how to use model averaging to aid predictions. # ## Chapter Outline # # 1. Read in Algorithm Portfolio Equity # 2. Choose the Best Algorithm Among 4 Variants # 3. Choose Best Bayesian Model of Algorithm Returns # 4. Compare Bayesian Cones for all Algos and all Return Models # 5. Compare Best Algo Predicted Portfolio Ending Values # 6. Compare Best Algo Predicted CAGR Distributions # 7. Model Averaging # 8. Conclusion # ## Algo Evaluation Motivation # # When evaluating our trading systems there are two major areas of uncertainty we have to address: `Algorithm Uncertainty`, `Model Uncertainty`. # # ### Algorithm Uncertainty (AU) # * trade sequencing # * slippage/price impact # * network errors # * software errors # * hardware errors # # ### Model Uncertainty (MU) # * model is misspecified # * incorrect parameters # * changing market environment/nonstationarity # * missing variables # * etc. # # What you will notice is that the `AU` examples given involve issues that can occur once our algorithm is live. What that also means is that some combination of those issues _did not happen_ in the past but some combination of them _could happen_ in the future. How can we estimate the impact of random exogenous shocks to our trading strategy's performance? # # In this chapter we will employ a Bayesian methodology which will allow us to reasonably estimate the variation in strategy performance across many different return paths in an attempt to account for the exogenous shocks. The flip side to this approach is that we will have to use a model and all of the requisite assumptions. # # That introduces the second area of importance `MU`. To incorporate our uncertainty about which model is "best" we will create `3` different models and compare them. Then we will combine their predictions before we make our final inference and prediction. # *NOTE: in this chapter I have abstracted away some of the boilerplate code used to create and format data in the imported script `ch5_utils.py`. Some other functions and processes I have chosen to leave in the notebook for easier reference due to its importance in the analysis.* # In[1]: get_ipython().run_line_magic('load_ext', 'watermark') get_ipython().run_line_magic('watermark', '') get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') # import standard libs from IPython.display import display from IPython.core.debugger import set_trace as bp from pathlib import PurePath, Path import sys import time from collections import OrderedDict as od import re import os import json os.environ['THEANO_FLAGS'] = 'device=cpu,floatX=float32' # get project dir pp = PurePath(Path.cwd()).parts[:-1] pdir = PurePath(*pp) script_dir = pdir / 'scripts' viz_dir = pdir / 'visuals' / '05_Algorithm_Evaluation' log_dir = pdir / 'data' / 'quantconnect_data' sys.path.append(script_dir.as_posix()) # import python scientific stack import pandas as pd pd.set_option('display.max_rows', 50) import numpy as np import scipy.stats as stats import math import pymc3 as pm from theano import shared, theano as tt import ffn # import visual tools import matplotlib as mpl import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') import seaborn as sns plt.style.use('seaborn-talk') plt.style.use('bmh') # import util libs from tqdm import tqdm import warnings warnings.filterwarnings("ignore") from utils import cprint from ch5_utils import pymc3_helper pmh = pymc3_helper() RANDOM_STATE = 777 today = pd.to_datetime('today').date() print()# get_ipython().run_line_magic('watermark', '-p pandas,numpy,pymc3,theano,sklearn,statsmodels,scipy,ffn,matplotlib,seaborn') # ## Read in backtest *portfolio equity* data. # # To do this I have defined some useful convenience functions to clean up the raw dataset. # # 1. Read in raw json # 2. Extract portfolio equity from json, rename columns, and parse time column # 3. Rename df columns # 4. Read in the properly formatted df # In[2]: def read_bt_json(fp): """fn: read Quantconnect backtest json""" with open(fp, encoding='utf-8') as f_in: return(json.load(f_in)) def extract_portfolio_equity(jdata): """fn: extract port equity timeseries from Quantconnect json""" d = jdata['Charts']['Strategy Equity']['Series']['Equity']['Values'] equity = (pd.DataFrame(d) .rename(columns=dict(x='time', y='equity')) .assign(time=lambda df: pd.to_datetime(df.time, utc=True, unit='s')) .set_index('time')) return equity def _get_column_name(text): """fn: to get column name as first text group""" #n = 4 # hardcoded based on fn structure groups = text.split('.') return '_'.join(groups[:-1]) def read_port_equity(fn): fp = PurePath(log_dir / fn)#.as_posix() jdata = read_bt_json(fp) # get column name col = _get_column_name(fn) # extract equity data equity = (extract_portfolio_equity(jdata) .rename(columns=dict(equity=col))) return equity # In[3]: #norm_2_cp_equity = read_port_equity('normal_2_thres_70cp.json') norm_2_cp_equity = read_port_equity('normal_2_thres_57_lev1_5x_rm.json') norm_2_60_equity = read_port_equity('normal_2_thres_60.json') norm_2_70_equity = read_port_equity('normal_2_thres_70.json') norm_2_80_equity = read_port_equity('normal_2_thres_80.json') cprint(norm_2_cp_equity) # Combine strategy dataframes into one df for easier analysis. # In[4]: list_of_dfs = [norm_2_cp_equity, norm_2_60_equity, norm_2_70_equity, norm_2_80_equity] dfs = (pd.concat(list_of_dfs, axis=1) .resample('D') .mean() # resample to average daily value .dropna(how='all')) cprint(dfs) # In[5]: # portfolio equity returns R = ffn.to_log_returns(dfs).dropna(how='all') cprint(R) # ## Choose the Best Algorithm Among 4 Variants # Now we can use a useful package called [ffn](https://github.com/pmorissette/ffn) to compute an assortment of performance statistics we can use to compare the algorithm portfolios. We're going to choose the best algorithm using the following metrics: # # * Total Return # * Daily Sharpe # * CAGR # * Calmar Ratio # In[6]: perf = ffn.calc_stats(dfs) # We can view the portfolio cumulative returns indexed to 100 easily # In[7]: perf.plot() # Next we display the performance statistics. # In[8]: perf.display() # Among the four algorithms, according to the four metrics listed above the clear "winner" is the `normal_4` algo variant. This refers to the algorithm version that uses `4` mixture components and predicts the return distribution by sampling from the `normal` distribution. # # **Updated**: Best variant is the cherry picked version using a little bit of leverage and a long-term holding period. # ## Choose Best Bayesian Model of Algorithm Returns # # In this section we address the dual issues of **algorithm uncertainty** and **model uncertainty**. # # ### Model Sampling and Comparison # # In this next section we model portfolio equity returns using 3 different distributions: `normal`, `laplace`, `student T`. # Then we compare the models. # # $$ Y = \mu + \sigma $$ # # Here's the `Normal` specification: # # $\mu \sim N(0,1)$ # # $\sigma \sim HalfCauchy(1)$ # # $Y \sim N(\mu, \sigma)$ # # Here's the `Laplace` specification: # # $\mu \sim N(0,1)$ # # $\sigma \sim HalfCauchy(1)$ # # $Y \sim Laplace(\mu, \sigma)$ # # Here's the `Student T` specification: # # $\nu \sim exp(.1)$ # # $\mu \sim N(0,1)$ # # $\sigma \sim HalfCauchy(1)$ # # $Y \sim StudentT(\nu+2, \mu, \sigma)$ # In[11]: ## Code adapted from: https://github.com/quantopian/pyfolio/blob/master/pyfolio/bayesian.py ## def normal_model(R, samples, name): """fn: sample normal model of strategy returns params ------ R: pd.Series() simple returns ts samples: int() name: str(), model name returns ------- model, trace """ with pm.Model(name=name) as model: mu = pm.Normal('mean rets', mu=0, sd=.01, testval=R.mean()) sigma = pm.HalfCauchy('vol', beta=1, testval=R.std()) returns = pm.Normal('returns', mu=mu, sd=sigma, observed=R) pm.Deterministic( 'annual mean returns', returns.distribution.mean * 252) pm.Deterministic( 'annual volatility', returns.distribution.variance**.5 * np.sqrt(252)) pm.Deterministic( 'sharpe', returns.distribution.mean / returns.distribution.variance**.5*np.sqrt(252)) step = pm.NUTS() trace = pm.sample(samples, tune=samples, step=step) return model, trace def laplace_model(R, samples, name): """fn: sample laplace model of strategy returns params ------ R: pd.Series() simple returns ts samples: int() name: str(), model name returns ------- model, trace """ with pm.Model(name=name) as model: mu = pm.Normal('mean rets', mu=0, sd=.01, testval=R.mean()) sigma = pm.HalfCauchy('vol', beta=1, testval=R.std()) returns = pm.Laplace('returns', mu=mu, b=sigma, observed=R) pm.Deterministic( 'annual mean returns', returns.distribution.mean * 252) pm.Deterministic( 'annual volatility', returns.distribution.variance**.5 * np.sqrt(252)) pm.Deterministic( 'sharpe', returns.distribution.mean / returns.distribution.variance**.5*np.sqrt(252)) step = pm.NUTS(target_accept=.9) trace = pm.sample(samples, tune=samples, step=step) return model, trace def student_model(R, samples, name): """fn: sample student T model of strategy returns params ------ R: pd.Series() simple returns ts samples: int() name: str(), model name returns ------- model, trace """ with pm.Model(name=name) as model: nu = pm.Exponential('nu_minus_two', 1. / 10., testval=3.) mu = pm.Normal('mean rets', mu=0, sd=.01, testval=R.mean()) sigma = pm.HalfCauchy('vol', beta=1, testval=R.std()) returns = pm.StudentT('returns', nu=nu+2, mu=mu, sd=sigma, observed=R) pm.Deterministic( 'annual mean rets', returns.distribution.mean * 252) pm.Deterministic( 'annual volatility', returns.distribution.variance**.5*np.sqrt(252)) pm.Deterministic( 'sharpe', returns.distribution.mean / returns.distribution.variance**.5*np.sqrt(252)) step = pm.NUTS(target_accept=.9) trace = pm.sample(samples, tune=samples, step=step) return model, trace def run_models_traces(r, samples=2_000): """fn: to run multiple models using algo returns params ------ r: shared theano array example: shared(R['normal_4'].values) returns ------- models: ordereddict with model outputs traces: ordereddict with trace outputs """ # get model, traces norm_model, norm_trace = normal_model(r, samples, 'normal_model') la_model, la_trace = laplace_model(r, samples, 'la_model') t_model, t_trace = student_model(r, samples, 't_model') # ordered dict is required to ensure insertion order # python 3.7 dict will implement insertion order feature by default models = od(norm_model=norm_model, la_model=la_model, t_model=t_model) traces = od(norm_trace=norm_trace, la_trace=la_trace, t_trace=t_trace) compareDict = {norm_model:norm_trace, la_model:la_trace, t_model:t_trace} return models, traces, compareDict # For this portion of the analysis we will focus on the best algorithm variant `normal_4`. # In[12]: best_algo_variant = 'normal_2_thres_57_lev1_5x_rm' #'normal_2_thres_70cp' r = R[best_algo_variant].values models, traces, compareDict = run_models_traces(r) # In[13]: df_comp_waic = (pm.compare(compareDict, method='stacking')) df_comp_waic # In[14]: fig, ax = plt.subplots() pm.compareplot(df_comp_waic, ax=ax); # The best model is the `t_model` as it has the smallest deviance critierion. We can look at its traceplots below. # In[15]: rvs = pmh.get_model_varnames(models['t_model']) pmh.plot_traces_pymc(traces['t_trace'], varnames=rvs) # ## Comparing Bayesian Cones for all Algos and all Return Models # # Even though we already know which model best describes the data, let's compare these algo variants and models a little more, in order to better understand how the model choices affect our algorithm performance predictions. # # To demonstrate this, in the next section we will sample from each of the 3 models for each of the 4 algorithm variants and output their predicted performance in the form of a [bayesian cone](https://blog.quantopian.com/bayesian-cone/). I first became aware of this concept via Thomas Wiecki at Quantopian. Simply put it is a Bayesian methodology to predict the probable return paths of the algorithm in question. Their primary use case was trying to determine if an algorithm was overfitted to its sample period by comparing its performance since going **live** vs its **backtest** performance. Once an algorithm is **live** if it severely underperforms the sampled bayesian credible intervals then it is likely overfitted. # # However in this use case we are going to adapt the methodology to simply predict the algo's likely return paths (aka future performance) and our uncertainty concerning the predicted performance. # # First we are going to make a prediction dictionary to hold the following information for each model: # # * `name`: this is the string name of the model and the dictionary key. # * `ppc_samples`: this is the raw `pm.sample_ppc()` output. # * `ppc`: this is a formatted df of the `ppc_samples` output. # * `cuml_df`: this is a formatted df of *predicted* cumulative returns. # In[20]: preds = {} for algo_name in R.columns: print('-'*77) print(f'sampling algorithm variant: {algo_name}') print() tmp_dict = {} tmp_r = shared(R[algo_name].dropna().values) tmp_models, tmp_traces, tmp_compareDict = run_models_traces(tmp_r) for model, trace in tmp_compareDict.items(): ppc_samples = pm.sample_ppc(trace, samples=252*2, model=model, size=tmp_r.eval().shape[0]) #ppc_samples = pm.sample_posterior_predictive(trace, # samples=252*2, # model=model, # size=tmp_r.eval().shape[0]) tmp_df = pd.DataFrame(ppc_samples[f'{model.name}_returns']) tmp_df.index = pd.date_range(pd.to_datetime('today').date(), periods=tmp_df.shape[0], freq='D') ppc = pmh.make_ppc_df(tmp_df) cuml_df = ppc.cumsum() tmp_dict[model.name] = dict(ppc_samples=ppc_samples, ppc=ppc, cuml_df=cuml_df) preds[algo_name] = tmp_dict # In this section we are adapting the [bayesian cone](https://github.com/quantopian/pyfolio/blob/master/pyfolio/bayesian.py) code from [pyfolio](https://github.com/quantopian/pyfolio) for our use case below. # # 1. The first function `make_train_test()` is creating the train and test splits needed for the bayesian cone plot. In the original use case this is a little simpler because we would simply be splitting the algorithm return series at the date when the algorithm is **live**. In this custom implementation we are using *all* of the algorithm's *cumulative returns + 1*, as the **train** set. The **test** set is the *predicted mean* of the sampled cumulative returns scaled by the last return of the train set. # # 2. The function `compute_bayes_cone()` is calculating the percentile scores, or returns at the specified percentiles. # # 3. The function `plot_bayes_cone()` plots a single algorithm's predicted performance along with credible intervals we computed `compute_bayes_cone()` # # 4. The function `plot_bayes_cone_grid()` is simply a wrapper function to plot each of the `4` algorithm variants with each of the `3` models for easy comparison # In[ ]: ## Code adapted from: https://github.com/quantopian/pyfolio/blob/master/pyfolio/bayesian.py ## def make_train_test(returns, cuml_df): """fn: to make train test dfs Params ------ returns: pd.Series() cuml_df: df of simulated cuml returns Returns ------- train, test: pd.Series(), pd.DataFrame() """ t = returns.cumsum().add(1) t_test = cuml_df['mean_sim_path'].add(1) * t.iloc[-1] t_test.index = t_test.index.tz_localize('utc') return t, t_test def compute_bayes_cone(preds, starting_value=1.): """ Compute 5, 25, 75 and 95 percentiles of cumulative returns, used for the Bayesian cone. Params ------ preds : numpy.array Multiple (simulated) cumulative returns. starting_value : int (optional) Have cumulative returns start around this value. Default = 1. Returns ------- dict of percentiles over time Dictionary mapping percentiles (5, 25, 75, 95) to a timeseries. """ def scoreatpercentile(cum_preds, p): return [stats.scoreatpercentile( c, p) for c in cum_preds.T] cum_preds = np.cumprod(preds + 1, 1) * starting_value perc = {p: scoreatpercentile(cum_preds, p) for p in (5, 25, 75, 95)} return perc def plot_bayes_cone(train, test, algo_name, model_name, percentiles, ax=None): """fn: plot bayes cone using train 'test' split""" if ax is None: ax = plt.gca() t = train t_rel = test t.loc[t_rel.index[0]] = t_rel.iloc[0] t.plot(ax=ax, color='g', label='in-sample') t_rel.plot(ax=ax, color='r', label='future estimate') ax.fill_between(t_rel.index, percentiles[5], percentiles[95], alpha=.3) ax.fill_between(t_rel.index, percentiles[25], percentiles[75], alpha=.6) ax.legend(loc='upper left', frameon=True, framealpha=0.5) ax.set_title(f'{algo_name}::{model_name}') ax.set_xlabel('') ax.set_ylabel('Cumulative returns') ax.set_xlim(t.index[0], t_rel.index[-1]) def plot_bayes_cone_grid(preds, R, model_labels): algo_names = R.columns n = len(model_labels) m = len(algo_names) fig, axes = plt.subplots(m,n, figsize=(15,15)) for i, algo_name in enumerate(algo_names): for j, model_label in enumerate(model_labels): r = R[algo_name] tmp_c_df = preds[algo_name][model_label]['cuml_df'] train, test = make_train_test(r, tmp_c_df) tmp_ppc_samples = preds[algo_name][model_label]['ppc_samples'] # extract subsample before computing cone df_t = (pd.DataFrame(tmp_ppc_samples[f'{model_label}_returns']) .sample(len(tmp_ppc_samples[f'{model_label}_returns']), axis=1) .values) perc = compute_bayes_cone(df_t, starting_value=train.iloc[-1]) ax = axes[i,j] plot_bayes_cone(train, test, algo_name, model_label, perc, ax=ax) plt.suptitle('Bayesian cone || algo_name::model_name', fontsize=14, fontweight='medium',) plt.tight_layout() fig.subplots_adjust(top=.92) today=pd.to_datetime('today').date() save_pth = PurePath(viz_dir/f'bayesian_cones_comparison_{today}.png').as_posix() fig.savefig(save_pth, dpi=300, bbox_inches='tight') # Now let's examine the results. # In[ ]: model_labels = ['normal_model', 'la_model', 't_model'] ## plot plot_bayes_cone_grid(preds, R, model_labels) # ### Some observations: # # * If we look at the last column which features all 4 algo variants paired with the `t_model` we can see that compared to the other columns the `t_model` consistently has the widest dispersion of predicted return paths. This is somewhat expected given that the Student T model is supposed to incorporate wider tails. # # * Looking at the `la_model` column we can see that it consistently estimates the predicted return path to hover very close to zero on average no matter what the performance of the of the algorithm was. This is also expected because the `Laplace` distribution is made by double exponential functions that concentrate the mass of the distribution around 0. The `la_model` also seems to have the smallest range in the cone compared to the others. # # ## Compare Best Algo's Predicted Portfolio Ending Values # ## Compare Best Algo Predicted CAGR Distributions # # In the next section we will quantify our observations about the bayesian cone widths. We will again disregard all the other algorithm variants and use our `best_algo_variant` which is the `normal_4` algo to compare each of the `3` models and the width's of their bayesian cones at the **end** of the prediction period. To do this we will be using the distribution of the simulated portfolio ending values and comparing their `5, 50, 95` percentile values. # # This analysis will also allow us to compare estimated CAGRs of the `best_algo_variant` for each of the `3` return models. # First let's look at a quick sample of the predicted cumulative return paths of our best algorithm with the "best" model of returns. # In[ ]: ex_cuml_df = preds[best_algo_variant]['t_model']['cuml_df'] fig, ax = plt.subplots(figsize=(15,5)) ex_cuml_df.sample(150, axis=1).plot(legend=False, ax=ax, x_compat=True) xmin, xmax = ex_cuml_df.index.min(), ex_cuml_df.index.max() ax.set_xlim((xmin, xmax)) save_pth=PurePath(viz_dir/f'best_algo_t_model_simulated_paths_sample_{today}.png').as_posix() fig.savefig(save_pth, dpi=300) # In this section we are going to extract the simulated portfolio ending values using the best algo variant `normal_2_thres_70cp` and for each return model and we're going to compare the ending value distributions. # # We will also compute **CAGR's** for each simulated return path and summarize the distribution of expected CAGRs using the `5, 50, 95` percentiles. # In[ ]: n_cols = 5000 quantiles = [0.05,.5,.95] for model in model_labels: ## get ppc, paths and end vals tmp_ppc = preds[best_algo_variant][model]['ppc'] sim_path_df = pmh.get_paths(tmp_ppc, n_cols) sim_end_val = pmh.get_end_vals(sim_path_df) ## plot sim port end vals fig, axes = plt.subplots(1,2, figsize=(15,3)) pmh.plot_port_end_dist(sim_end_val, axes, model_name=model) save_pth = PurePath(viz_dir/f'best_algo_{model}_simulated_ending_values_dist_{today}.png').as_posix() fig.savefig(save_pth, dpi=300) ## plot cagr bar cagr = ffn.calc_cagr(sim_path_df) step = .01 pcts = np.arange(0,1+step,step) pcts = np.round(pd.Index(pcts),2) ser = cagr.quantile(pcts) fig, ax = plt.subplots(figsize=(15,3)) pmh.plot_cagr_bar(ser, quantiles, ax, model_name=model) save_pth = PurePath(viz_dir/f'best_algo_{model}_simulated_cagr_perc_{today}.png').as_posix() fig.savefig(save_pth, dpi=300) # ## Model Averaging # # So now we have `3` return models. Our previous `WAIC` based decision criterion told us that the `t_model` does the best job of modeling the algorithm returns. But as we can see there are other potentially credible models that hold information that could help improve our predictions. How can we incorporate other useful models in our predictions? # # Lucky for us `pymc3` has made the process straightforward. In section `3` we compared our `3` models using `pm.compare()`. The output dataframe has a column of `weights` which we can *"vaguely interpret as the probability that each model will make the correct predictions on future data."* # # Next we can use the function `pm.sample_ppc_w()` to incorporate those weights. Make sure the weights are ordered identically to the traces and models we constructed above. # In[ ]: df_comp_waic # In[ ]: def make_stacked_model_df(traces, models, weights, samples=500, seed=0): """fn: sample wtd ppc Params ------ traces: list of traces models: list of models weights: pd.Series of weights Returns ------- ppc_df: df of wtd ppc samples """ cols = [] for i in tqdm(range(samples)): ppc_w = pm.sample_ppc_w(traces=traces, samples=252*2, models=models, weights=weights, random_seed=seed+i, # reproducible progressbar=False) ppc_w_data = (ppc_w[k] for k in list(ppc_w.keys())) col = pd.Series(np.hstack(ppc_w_data)) cols.append(col) ppc_df = (pd.concat(cols,axis=1)) ppc_df.index = pd.date_range(pd.to_datetime('today').date(), periods=ppc_df.shape[0], freq='D') ppc_df = ppc_df.assign(mean_sim_port=lambda df: df.mean(1)) return ppc_df traces = list(compareDict.values()) models = list(compareDict.keys()) weights = df_comp_waic.weight.sort_values(ascending=True) ppc_df = make_stacked_model_df(traces, models, weights) cprint(ppc_df) # In[ ]: n_cols = 500 quantiles = [0.05,.5,.95] ## get ppc, paths and end vals sim_path_df = pmh.get_paths(ppc_df, n_cols) sim_end_val = pmh.get_end_vals(sim_path_df) ## plot sim port end vals fig, axes = plt.subplots(1,2, figsize=(15,3)) pmh.plot_port_end_dist(sim_end_val, axes, model_name='stacked_model') save_pth = PurePath(viz_dir/f'stacked_model_simulated_ending_values_dist_{today}.png').as_posix() fig.savefig(save_pth, dpi=300) ## plot cagr bar cagr = ffn.calc_cagr(sim_path_df) step = .01 pcts = np.arange(0,1+step,step) pcts = np.round(pd.Index(pcts),2) ser = cagr.quantile(pcts) fig, ax = plt.subplots(figsize=(15,3)) pmh.plot_cagr_bar(ser, quantiles, ax, model_name='stacked_model') save_pth = PurePath(viz_dir/f'stacked_model_simulated_cagr_perc_{today}.png').as_posix() fig.savefig(save_pth, dpi=300) # What if we try our own arbitrary weights? # In[ ]: new_weights = [0.32, 0.34, 0.34] new_ppc_dfs = make_stacked_model_df(traces, models, new_weights) cprint(new_ppc_dfs) # In[ ]: n_cols = 500 quantiles = [0.05,.5,.95] ## get ppc, paths and end vals sim_path_df = pmh.get_paths(new_ppc_dfs, n_cols) sim_end_val = pmh.get_end_vals(sim_path_df) ## plot sim port end vals fig, axes = plt.subplots(1,2, figsize=(15,3)) pmh.plot_port_end_dist(sim_end_val, axes, model_name='alt_stacked_model') save_pth = PurePath(viz_dir/f'alt_stacked_model_simulated_ending_values_dist_{today}.png').as_posix() fig.savefig(save_pth, dpi=300) ## plot cagr bar cagr = ffn.calc_cagr(sim_path_df) step = .01 pcts = np.arange(0,1+step,step) pcts = np.round(pd.Index(pcts),2) ser = cagr.quantile(pcts) fig, ax = plt.subplots(figsize=(15,3)) pmh.plot_cagr_bar(ser, quantiles, ax, model_name='alt_stacked_model') save_pth = PurePath(viz_dir/f'alt_stacked_model_simulated_cagr_perc_{today}.png').as_posix() fig.savefig(save_pth, dpi=300) # ## Conclusions # # Using our `stacked_model` with the original pymc3 generated weights we can interpret the results as so; Trading the `normal_2_thres_70cp` algorithm variant, we would expect over the next `504 trading days` or approximately `2 years` that our portfolio equity will be between `~$92K` and `~$114K` with an expected value of `~$103K`. The predicted CAGR is estimated to be between `-5%` and `+10%` with an expected value of `2.0%`. # In[ ]:

Table of Contents