This takes a leveraged ETF and extends it back into the past using a proxy fund.
The basic idea is to multiply the daily returns of the proxy by the leverage factor, adjusting for fees and other expenses. Since some of those expenses are hard to obtain, it can also find the parameters that minimize the difference between the leveraged proxy and the actual leveraged ETF. It plots a telltale chart with difference metrics and writes the simulated prices out to CSV.
If you are new to Jupyter Notebook, you can find tutorials online. If you are not already, you can edit and run this notebook interactively on Binder.
import sys
import warnings
import unittest
from itertools import chain, combinations
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance
from scipy.optimize import minimize
from IPython.core.display import display, HTML
from IPython.display import Image
from util import yget, read_fred, annret, annvol, cumret, TRADING_DAYS
assert sys.version_info >= (3, 6, 0), "Ordered dicts are where it's at"
display(HTML("<style>.container { width:90% !important; }</style>"))
# Set to plotly for interactive figures, matplotlib for static images
pd.options.plotting.backend = "plotly"
if pd.options.plotting.backend == "plotly":
import plotly.io
plotly.io.templates.default = "plotly_white"
RASTER = True # Rasterize complex images to save time/space
%matplotlib inline
plt.rcParams["figure.figsize"] = (16, 9) # Matplotlib likes this in a separate cell
Leveraged funds have to borrow money, and the borrowing cost is not included in the expense ratio, so we have to account for it. We use the Effective Federal Funds Rate because it has history back to 1954 and gives slightly better fits than the LIBOR.
# Source is in percent.
fedfunds = read_fred('DFF').rename('FEDFUNDS') / 100
#tbill = read_fred('DTB3').rename('TBILL') / 100
#libor1d = read_fred('USDONTD156N').rename('LIBOR-1d') / 100
#libor1w = read_fred('USD1WKD156N').rename('LIBOR-1w') / 100
#libor1m = read_fred('USD1MTD156N').rename('LIBOR-1m') / 100
#libor12m = read_fred('USD12MD156N').rename('LIBOR-12m') / 100
BORROW = fedfunds
This function leverages a proxy price series using configurable leverage factor, expense ratio, and borrowing rate.
As a first pass, to leverage a daily return $ret$ by a leverage factor $factor$, we just scale the return and subtract the (daily) expense ratio $exp$:
$$ lev = factor * ret - exp $$However, the fund's borrowing and trading costs are not included in the expense ratio, so we subtract borrowing costs from the leveraged return. A 3X fund meeds to borrow an additional 2X the principal, broadly speaking, or in general factor - 1
times the borrowing costs. Borrowing costs vary over time, so we use a short-term interest rate benchmark such as the Federal Funds Rate or LIBOR.
Under the hood, leveraged ETFs hold some stock and some swaps; the main unknowns in simulating an ETF are the fraction of assets in swaps and the swap rate (some exploration here and here).
To account for these unknowns, we add an adjustment $C$, to be determined for each ETF.
$$ lev = factor * ret - exp - (factor - 1) * borrow + C$$$C$ is a constant found by curve-fitting, usually quite small.
Finally, the expense ratio and borrowing costs are annual figures, so to get daily values we divide by the periods per year.
def leverage(prices, factor=2, expense=0.01, borrow_rate=BORROW, ann_periods=TRADING_DAYS,
factor_scale=1.0, factor_incr=0, borrow_scale=1.0, expense_incr=0):
""":Return: a Series giving the daily leveraged value of `prices` at a given leverage `factor`.
This is basically the per-period change in prices minus the expense ratio and borrow rate.
The expense ratio and borrow rate are divided (evenly, arithmetically) by `ann_periods`.
:param float factor: The leverage factor by which daily returns are multiplied.
:param float expense: Net expense ratio per `ann_periods` as a fraction. Deducted proportionally from each period.
Example: 0.0095 for a 0.95% annual expense ratio.
:param Series borrow_rate: The (annualized) interest rate used to finance short-term borrowing for leverage.
Deducted from daily returns. Typically the daily Federal Funds Rate or LIBOR.
:param int ann_periods: The number of periods over which rates are given. E.g., 252 for daily periods in a typical trading year.
The `_scale` and `_incr` parameters are adjustments to the corresponding parameters found through curve-fitting.
"""
# Align borrow rates with prices
prices = pd.Series(prices, dtype=float)
if prices.isna().any():
raise ValueError('NaN in prices')
if isinstance(borrow_rate, pd.Series):
if borrow_rate.isna().any():
raise ValueError('NaN in borrow_rate')
if prices.index[0] < borrow_rate.index[0]:
raise ValueError(f'Prices start {prices.index[0]}, before borrow_rate {borrow_rate.index[0]}')
borrow_rate = borrow_rate.reindex(index=prices.index, method='ffill')
name = f'{prices.name or ""}:{round(factor, 3)}X'
# Curve-fitting adjustments
borrow_rate *= borrow_scale
expense += expense_incr # This functions as an additive constant for the whole equation, since exp isn't scaled ("C" above)
change = prices.pct_change() * factor * factor_scale + 1 # Period-to-period changes as ratios
# Changes less expenses and borrowing costs, evenly distributed among periods
net_change = change - (expense + borrow_rate * (factor + factor_incr - 1)) / ann_periods
net_change.iat[0] = prices.iat[0] # Start leveraged series at same value so it's easily comparable
leveraged = net_change.cumprod()
return leveraged.rename(name)
# All possible leverage() parameters for curve fitting and their ranges
ALL_LEV_PARAMS = {'factor_scale': (0, 3), 'factor_incr': (-2, 2), 'borrow_scale': (-5, 5), 'expense_incr': (-2, 2)}
# Actually used leverage() parameters, determined in the 'Model Selection' section
LEV_PARAMS = {p: ALL_LEV_PARAMS[p] for p in ('expense_incr',)}
Quick test to make sure we're doing something right
def deleverage(prices, factor, expense, borrow, ann_periods=TRADING_DAYS):
# Assumes no fudge factors
rets = prices.pct_change() + (expense + borrow * (factor -1)) / ann_periods
rets = rets / factor + 1
rets.iat[0] = 1.0
return rets.cumprod()
class LeverageTest(unittest.TestCase):
def test_leverage(self):
vecs = (
[1.0] * 5,
[1.01] * 5,
np.arange(1.0, 1.1, 0.01),
np.arange(1.0, 0.9, 0.01),
)
for rets in vecs:
for factor in (1, 2, 3, 1.25):
for expense in (0.0, 0.01, 0.001, -0.01):
for borrow in (0.0, 0.01, 0.02):
rets = pd.Series(rets)
prices = pd.Series(1.0).append(rets.cumprod()).reset_index(drop=True)
lev = leverage(prices, factor, expense, borrow)
delev = deleverage(lev, factor, expense, borrow)
pd.testing.assert_series_equal(prices, delev, check_names=False)
unittest.TextTestRunner().run(unittest.TestLoader().loadTestsFromTestCase(LeverageTest));
. ---------------------------------------------------------------------- Ran 1 test in 0.439s OK
def norm(prices):
""":Return: prices normalized to start at 1.0."""
return prices / prices.iloc[0]
def cat(*dfs, dropna=True):
""":Return: the column-wise concatenation of a sequence of Series or DataFrames.
:param bool dropna: If True, remove rows with any NaN from the result.
"""
result = pd.concat(dfs, axis=1)
if dropna:
result = result.dropna()
return result
def align(*prices, dropna=True, norm=False):
""":Return: The `prices` Series with only the dates in common to all of them, as a sequence.
:param bool norm: If True, normalize each series of prices to start at 1.0.
"""
aligned = cat(*prices, dropna=dropna)
if norm:
aligned = globals()['norm'](aligned) # Calls norm() function, since we shadowed the name. Bit naughty.
return tuple(col for _, col in aligned.iteritems())
def telltale(reference, *dfs, **layout_kws):
"""Plot the growth of several dataframes or series `dfs` relative to a `reference` series.
https://www.bogleheads.org/wiki/Telltale_chart
"""
tell = norm(cat(reference, *dfs))
if tell.columns.nunique() < len(tell.columns):
raise ValueError('Column names must be unique.')
tell = tell.apply(lambda c: c / tell.iloc[:, 0]) # straight division doesn't work for some reason
fig = tell.plot(title=f'Telltale Chart: {", ".join(tell.columns)}')
if pd.options.plotting.backend == "plotly":
fig.update_layout(margin=dict(t=50), **layout_kws).show()
def plotret(*prices, title=None):
"""Nice Plotly cumulative returns plot."""
return norm(cat(*prices)).sub(1).plot(title=title).update_layout(
yaxis=dict(tickformat=".0%"),
margin=dict(t=50),
legend_title_text='',
yaxis_title='Cumulative Return',
width=950,
height=450,
)
def color_leverage(factor, alpha=1.0, max_factor=3):
""":Return: a plotly color string for a given leverage `factor`, more green for more long, more red for more short."""
intensity = int(abs(factor) / max_factor * 255)
if factor >= 0:
return f'rgba(0,{intensity},0,{alpha})'
else:
return f'rgba({intensity},0,0,{alpha})'
def rasterize(figure, raster=False, width=1100, height=600, filename=None):
"""Maybe render a plotly `figure` as a static image, to save space and time."""
if raster:
if filename:
figure.write_image(filename, width=width, height=height, scale=1, engine="kaleido")
return Image(url=f'{filename}?cache_bust={np.random.randint(100000)}')
else:
return Image(figure.to_image(format="png", width=width, height=height, scale=1, engine="kaleido"))
else:
return figure
def splice(old, new):
"""Splice together `new` prices with `old` prices before them, adjusted so new prices don't change."""
if old.index[-1] < new.index[0]:
raise ValueError(f'Last old index {old.index[-1]} and first new index {new.index[0]} must overlap')
if old.index[0] > new.index[0]:
warnings.warn(f'Old has no data older than new; old starts {old.index[0]}, new starts {new.index[0]}')
return new
first = old.index.get_loc(new.index[0], method='ffill') # Find previous value if no exact match
ratio = old.iloc[first] / new.iloc[0]
return pd.concat((old.iloc[:first] / ratio, new), verify_integrity=True).rename(new.name)
def rmse(a, b):
return np.sqrt(np.mean((a - b) ** 2))
def mae(a, b):
return np.mean(np.abs(a - b))
def rel_rmse(a, b):
"""Relative RMSE between 1.0 and b / a, both aligned and normalized to start at 1.0."""
a, b = align(a, b, norm=True)
return rmse(1.0, b / a)
def rel_mae(a, b):
"""Relative MAE between 1.0 and b / a, both aligned and normalized to start at 1.0."""
a, b = align(a, b, norm=True)
return mae(1.0, b / a)
def return_rmse(a, b):
"""RMSE between simple periodic returns of price series `a` and `b`."""
a, b = align(a.pct_change(), b.pct_change())
return rmse(a, b)
def return_mae(a, b):
"""RMSE between simple periodic returns of price series `a` and `b`."""
a, b = align(a.pct_change(), b.pct_change())
return mae(a, b)
# This is, as you can imagine, sensitive to the most recent price.
def cumret_diff(a, b):
"""Absolute difference between the cumulative return of `a` and `b`."""
a, b = align(a, b)
return abs(cumret(a) - cumret(b))
def errstats(reference, leveraged, ann_periods=TRADING_DAYS):
""":Return: dict of error metrics between expected `reference` price series and actual `leveraged` series
at the dates (indices) they have in common."""
reference, leveraged = align(reference, leveraged)
return {
'RMSE': rel_rmse(reference, leveraged),
'MAE': rel_mae(reference, leveraged),
'RETRMSE': return_rmse(reference, leveraged),
'RETMAE': return_mae(reference, leveraged),
'CAGR': annret(leveraged, ann_periods) - annret(reference, ann_periods),
'VOL': annvol(leveraged, ann_periods) - annvol(reference, ann_periods),
'P99': norm(leveraged).div(norm(reference)).sub(1).abs().quantile(.99),
}
def roundvals(d, digits=4):
"""Round the values of dict `d` to `digits` digits."""
return {k: round(v, digits) for k, v in d.items()}
These functions find the leverage parameters that minimize the error between a reference Series and a leveraged proxy, and plot the results along with error metrics.
There is the question of which error metric to optimize. The relative RMSE, basically how well the telltale chart aligns, seems to do the best job of minimizing all metrics (cumulative and simple, squared and absolute) across funds. The relative RMSE takes the simulated prices divided by the actual prices, and compute the RMSE between that and 1.0, which would be the ratio if they matched perfectly.
We can get away with local minimization here (as opposed to global) because the leverage function for a single day w.r.t. the leverage fit parameters is convex nonnegative increasing, the product of such functions (i.e. the cumulative return) is convex, and norms like RMSE are also convex.
def find_params(reference, proxy, factor=2, expense=0.01, borrow_rate=BORROW, ann_periods=TRADING_DAYS,
params=LEV_PARAMS, errfunc=rel_rmse):
"""Find `params` that minimize the error between a `reference` series and its leveraged `proxy`.
:param dict params: Maps parameters to `leverage()` to the range to search for optimal values.
:param func errfunc: Error function that will be minimized; takes two price series and returns a distance metric between them.
"""
if not params:
return {} # Well that was easy
reference, proxy = align(reference, proxy)
def obj(x):
param_dict = dict(zip(params.keys(), x)) # param name: value
return errfunc(reference, leverage(proxy, factor, expense, borrow_rate=borrow_rate, ann_periods=ann_periods, **param_dict))
# Find params x that minimize obj(x)
x0 = tuple(map(np.mean, params.values())) # Initial guess = midpoint of bounds
res = minimize(obj, x0, bounds=list(params.values()))
best = dict(zip(params.keys(), res.x)) # param name: optimal value
return best
def plotbest(reference, proxy, factor=2, expense=0.01, borrow_rate=BORROW, ann_periods=TRADING_DAYS, plot=True, errfunc=rel_rmse, params=LEV_PARAMS):
"""Find leverage parameters that minimize error between `reference` and leveraged `proxy`, plot a telltale
chart, and return the new leveraged series."""
best = find_params(reference, proxy, factor=factor, expense=expense, borrow_rate=borrow_rate, ann_periods=ann_periods, errfunc=errfunc, params=params)
print(reference.name + ':' + proxy.name, '\tparams:', ', '.join(f'{k}={v}' for k, v in roundvals(best, 4).items()))
# Get leveraged series with best params
leveraged = leverage(proxy, factor=factor, expense=expense, borrow_rate=borrow_rate, ann_periods=ann_periods, **best)
ref, lev = align(reference, leveraged, norm=True) # Might be superfluous
error = errstats(ref, lev, ann_periods=ann_periods)
print(', '.join(f'{k}: {v}' for k, v in roundvals(error).items()))
sim = leveraged[:reference.index[0]]
simret = cumret(sim) if not sim.empty else 0
print(f'CUMRET: sim {simret:.4f} + actual {cumret(reference):.4f} = {(simret + 1) * (cumret(reference) + 1) - 1:.4f}')
if plot:
telltale(ref, lev)
return leveraged
You can leverage your own ETF by changing the tickers below. Change UPRO
to the leveraged ETF you want to extend, and ^SP500TR
to the index or fund it leverages. Change the factor and expense ratio to match the leveraged fund. Check that the telltale chart looks reasonably flat and close to 1.0. The RMSE should be less than say .03 or so.
leveraged
will be the simulated leveraged price series.
letf, proxy = yget('UPRO'), yget('^SP500TR')
leveraged = plotbest(letf, proxy, factor=3, expense=0.0095, plot=True);
UPRO:^SP500TR params: expense_incr=0.0086 RMSE: 0.0075, MAE: 0.0066, RETRMSE: 0.0022, RETMAE: 0.0013, CAGR: 0.0019, VOL: 0.0051, P99: 0.0164 CUMRET: sim 1.1356 + actual 34.0091 = 73.7659
fig = plotret(proxy.rename('S&P500'), leveraged, title='3X S&P (UPRO) Back to the 80s')
rasterize(fig, True, filename='images/upro.png')
Below we extend many popular LETFs in bulk. You can add more to the list, run the notebook, and they will be included in the output.
# Fund: (benchmark, leverage factor, expense ratio, issuer, start year for good data (or None to use all))
FUNDS = {
# Mutual Funds
'RYNVX': ('^SP500TR', 1.5, .0138, 'Rydex', '2000'),
'ULPIX': ('^SP500TR', 2, .016, 'ProFunds', '2003'),
#'RYTPX': ('^SP500TR', -2, .0184, 'Rydex', None), # Bad data
#'UOPIX': ('QQQ', 2, .0159, 'ProFunds', None), # Bad data
#'RYVNX': ('QQQ', -2, .0187, 'Rydex', None), # Bad data
'UAPIX': ('IWM', 2, .0178, 'ProFunds', '2003'), # Russel 2000
#'RYIRX': ('IWM', -2, .0191, 'Rydex', None), # Russel 2000; Bad data
'UMPIX': ('MDY', 2, .0166, 'ProFunds', '2003'), # S&P MidCap 400
#'UDPIX': ('DIA', 2, .0172, 'ProFunds', None), # Dow; bad data
'UTPIX': ('IDU', 1.5, .0173, 'ProFunds', '2004'), # Utilities
'REPIX': ('IYR', 1.5, .0178, 'ProFunds', '2010'), # Real Estate
#'SRPIX': ('IYR', -1, .0178, 'ProFunds', None), # Real Estate; Bad Data
#'RYEUX': ('FEZ', 1.25, .0182, 'Rydex', None), # EuroSTOXX 50; no good benchmark (data)
'DXKLX': ('IEF', 2, .0143, 'Direxion', '2013'), # ITT
'DXKSX': ('IEF', -2, .014, 'Direxion', '2013'), # ITT
'UNPIX': ('EFA', 2, .0178, 'ProFunds', None), # MSCI EAFE (large - mid foreign)
'UUPIX': ('ADRE', 2, .0178, 'ProFunds', '2009'), # Emerging Markets
# ETFs
'SSO': ('^SP500TR', 2, .0091, 'ProShares', '2009'), # S&P 500
'UPRO': ('^SP500TR', 3, .0093, 'ProShares', None),
'SPXL': ('^SP500TR', 3, .0101, 'Direxion', '2013'),
'SH': ('^SP500TR', -1, .009, 'ProShares', '2009'),
'SDS': ('^SP500TR', -2, .0091, 'ProShares', '2009'),
'SPXS': ('^SP500TR', -3, .0107, 'Direxion', '2013'),
'QLD': ('QQQ', 2, .0095, 'ProShares', '2009'), # NASDAQ 100
'TQQQ': ('QQQ', 3, .0095, 'ProShares', None),
'PSQ': ('QQQ', -1, .0095, 'ProShares', '2009'),
'QID': ('QQQ', -2, .0095, 'ProShares', '2009'),
'SQQQ': ('QQQ', -3, .0095, 'ProShares', None), # Maybe 2013?
'MVV': ('MDY', 2, .0095, 'ProShares', '2010'), # MidCap 400
'MYY': ('MDY', -1, .0095, 'ProShares', '2010'),
'MZZ': ('MDY', -2, .0095, 'ProShares', '2010'),
'UWM': ('IWM', 2, .0095, 'ProShares', '2010'), # Russel 2000
'TNA': ('IWM', 3, .0112, 'Direxion', '2013'),
'RWM': ('IWM', -1, .0095, 'ProShares', '2010'),
'TWM': ('IWM', -2, .0095, 'ProShares', '2009'),
'TZA': ('IWM', -3, .0107, 'Direxion', '2013'),
'URE': ('IYR', 2, .0095, 'ProShares', '2010'), # Real Estate
'REK': ('IYR', -1, .0095, 'ProShares', '2011'),
#'SRS': ('IYR', -2, .0095, 'ProShares', '2010'), # Poor fit
'UPW': ('IDU', 2, .0095, 'ProShares', '2009'), # Utilities
'SDP': ('IDU', -2, .0095, 'ProShares', '2009'),
'EFO': ('EFA', 2, .0095, 'ProShares', None), # MSCI EAFE
'EFZ': ('EFA', -1, .0095, 'ProShares', '2010'),
'EFU': ('EFA', -2, .0095, 'ProShares', '2010'),
'EET': ('EEM', 2, .0095, 'ProShares', None), # Emerging Markets
'EUM': ('EEM', -1, .0095, 'ProShares', '2009'),
'EEV': ('EEM', -2, .0095, 'ProShares', '2011'),
'UST': ('IEF', 2, .0095, 'ProShares', '2012'), # 7-10 Yr Treasury
'TYD': ('IEF', 3, .0109, 'Direxion', '2010'),
'TBX': ('IEF', -1, .0095, 'ProShares', None),
'PST': ('IEF', -2, .0095, 'ProShares', '2011'),
'TYO': ('IEF', -3, .0108, 'Direxion', '2011'),
'UBT': ('TLT', 2, .0095, 'ProShares', None), # 20+ Yr Treasury
'TMF': ('TLT', 3, .0105, 'Direxion', '2011'),
'TBF': ('TLT', -1, .0094, 'ProShares', '2011'),
'TBT': ('TLT', -2, .0092, 'ProShares', '2011'),
'TMV': ('TLT', -3, .0104, 'Direxion', '2011'),
'UGL': ('GLD', 2, .0095, 'ProShares', None), # Gold
'GLL': ('GLD', -2, .0132, 'ProShares', None),
}
extras = ('VUSTX',) # We'll use these later
tickers = frozenset(chain.from_iterable((fund, proxy) for fund, (proxy, *_) in FUNDS.items())) | frozenset(extras)
prices = yget(tickers)
prices
UUPIX | GLL | URE | SDP | VUSTX | ULPIX | EFO | UNPIX | QID | RYNVX | ... | EEM | TLT | IDU | IEF | ADRE | REK | UWM | QLD | ^SP500TR | QQQ | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||||||||
1986-05-19 | NaN | NaN | NaN | NaN | 1.014835 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1986-05-20 | NaN | NaN | NaN | NaN | 1.012803 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1986-05-21 | NaN | NaN | NaN | NaN | 1.022952 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1986-05-22 | NaN | NaN | NaN | NaN | 1.020923 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1986-05-23 | NaN | NaN | NaN | NaN | 1.022952 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-01-15 | 105.580002 | 33.540001 | 60.619999 | 18.260000 | 13.540000 | 83.589996 | 45.320000 | 18.660000 | 28.600000 | 111.379997 | ... | 54.349998 | 151.820007 | 78.059998 | 118.449997 | 59.720001 | 11.50 | 103.239998 | 113.559998 | 7789.790039 | 311.859985 |
2021-01-19 | 111.940002 | 33.040001 | 60.299999 | 18.400000 | 13.580000 | 84.919998 | 45.959999 | 18.950001 | 27.719999 | 112.739998 | ... | 55.250000 | 152.309998 | 77.690002 | 118.519997 | 61.369999 | 11.52 | 105.839996 | 116.919998 | 7853.470215 | 316.410004 |
2021-01-20 | 115.129997 | 31.950001 | 62.709999 | 18.180000 | 13.580000 | 87.300003 | 46.730000 | 19.180000 | 26.480000 | 115.089996 | ... | 56.220001 | 152.460007 | 78.169998 | 118.570000 | 62.490002 | 11.28 | 106.849998 | 122.269997 | 7962.919922 | 323.769989 |
2021-01-21 | 115.709999 | 31.990000 | 62.389999 | 18.440001 | 13.500000 | 87.379997 | 46.849998 | 19.240000 | 26.010000 | 115.150002 | ... | 56.320000 | 151.389999 | 77.660004 | 118.400002 | 62.570000 | 11.32 | 104.930000 | 124.330002 | 7966.109863 | 326.359985 |
2021-01-22 | 113.139999 | 32.500000 | 62.590000 | 18.330000 | 13.530000 | 86.830002 | 46.490002 | 19.030001 | 26.180000 | 114.620003 | ... | 55.830002 | 151.880005 | 77.739998 | 118.580002 | 61.779999 | 11.29 | 107.430000 | 123.550003 | 7942.200195 | 325.420013 |
8742 rows × 64 columns
rasterize(pd.concat((norm(p.dropna()) for _, p in prices.iteritems()), axis=1).plot(title='All funds cumulative return'), RASTER, filename='images/cumulative.png')
Now for each LETF, we find the best parameters and splice the old synthetically leveraged data with the new actual data (adjusting to most recent prices match current quotes).
Error metrics:
# Collect results from fitting and leveraging each ETF
sim, err, tells = {}, {}, {}
for name, (proxy, factor, exp, _, start) in FUNDS.items():
# Cut out initial bad data from leveraged fund, align with proxy
lfund, proxy = prices.loc[start:, name].dropna(), prices[proxy].dropna()
leveraged = plotbest(lfund, proxy, factor, exp, plot=False)
#params = find_params(lfund.iloc[len(lfund) // 2:], proxy, factor, exp)
#leveraged = leverage(proxy, factor, exp, **params)
sim[name] = splice(leveraged, lfund)
lfund, lev = align(prices[name].dropna(), leveraged, norm=True) # Plot whole series including bad initial data
tells[name] = lev.div(lfund).rename(name)
err[name] = errstats(lfund, lev)
print()
tells = pd.concat(tells, axis=1, verify_integrity=True)
err = pd.DataFrame.from_dict(err, orient='index')
RYNVX:^SP500TR params: expense_incr=0.0027 RMSE: 0.0109, MAE: 0.0082, RETRMSE: 0.0015, RETMAE: 0.0005, CAGR: 0.0005, VOL: -0.0009, P99: 0.0318 CUMRET: sim 10.5886 + actual 2.3718 = 38.0749 ULPIX:^SP500TR params: expense_incr=0.0083 RMSE: 0.0111, MAE: 0.0077, RETRMSE: 0.0051, RETMAE: 0.0006, CAGR: 0.001, VOL: 0.0029, P99: 0.0328 CUMRET: sim 4.3333 + actual 8.4147 = 49.2107 UAPIX:IWM params: expense_incr=0.0043 RMSE: 0.0115, MAE: 0.0083, RETRMSE: 0.0064, RETMAE: 0.0027, CAGR: 0.0018, VOL: -0.0071, P99: 0.033 CUMRET: sim -0.4417 + actual 7.7951 = 3.9099 UMPIX:MDY params: expense_incr=0.0005 RMSE: 0.0207, MAE: 0.0187, RETRMSE: 0.0068, RETMAE: 0.0024, CAGR: 0.0022, VOL: 0.0048, P99: 0.0358 CUMRET: sim 1.5689 + actual 9.7770 = 26.6848 UTPIX:IDU params: expense_incr=0.002 RMSE: 0.0096, MAE: 0.0075, RETRMSE: 0.0045, RETMAE: 0.0013, CAGR: -0.0001, VOL: -0.0096, P99: 0.0185 CUMRET: sim -0.2367 + actual 4.2563 = 3.0120 REPIX:IYR params: expense_incr=-0.0027 RMSE: 0.0058, MAE: 0.0035, RETRMSE: 0.0054, RETMAE: 0.0014, CAGR: 0.0006, VOL: -0.0052, P99: 0.0133 CUMRET: sim 0.5104 + actual 2.2129 = 3.8528 DXKLX:IEF params: expense_incr=0.0036 RMSE: 0.0043, MAE: 0.0035, RETRMSE: 0.0007, RETMAE: 0.0002, CAGR: 0.0006, VOL: -0.0004, P99: 0.0096 CUMRET: sim 1.5228 + actual 0.3031 = 2.2876 DXKSX:IEF params: expense_incr=0.0156 RMSE: 0.0115, MAE: 0.0067, RETRMSE: 0.0016, RETMAE: 0.0004, CAGR: -0.0096, VOL: -0.0031, P99: 0.0793 CUMRET: sim -0.7174 + actual -0.4213 = -0.8364 UNPIX:EFA params: expense_incr=0.0181 RMSE: 0.0084, MAE: 0.0056, RETRMSE: 0.0068, RETMAE: 0.0023, CAGR: 0.0005, VOL: -0.0186, P99: 0.0291 CUMRET: sim 0.8988 + actual -0.3432 = 0.2471 UUPIX:ADRE params: expense_incr=-0.0002 RMSE: 0.0071, MAE: 0.0052, RETRMSE: 0.007, RETMAE: 0.0035, CAGR: -0.001, VOL: 0.0017, P99: 0.0174 CUMRET: sim 1.1883 + actual 1.9681 = 5.4951 SSO:^SP500TR params: expense_incr=0.003 RMSE: 0.0065, MAE: 0.0059, RETRMSE: 0.0014, RETMAE: 0.0009, CAGR: 0.0013, VOL: 0.0041, P99: 0.0118 CUMRET: sim 4.1096 + actual 13.7689 = 74.4628 UPRO:^SP500TR params: expense_incr=0.0088 RMSE: 0.0075, MAE: 0.0066, RETRMSE: 0.0022, RETMAE: 0.0013, CAGR: 0.0019, VOL: 0.0051, P99: 0.0164 CUMRET: sim 1.1356 + actual 34.0091 = 73.7659 SPXL:^SP500TR params: expense_incr=0.0131 RMSE: 0.0023, MAE: 0.0017, RETRMSE: 0.0019, RETMAE: 0.0012, CAGR: 0.0007, VOL: 0.008, P99: 0.0066 CUMRET: sim 5.1305 + actual 9.4312 = 62.9482 SH:^SP500TR params: expense_incr=0.0002 RMSE: 0.0044, MAE: 0.0039, RETRMSE: 0.0008, RETMAE: 0.0005, CAGR: -0.0005, VOL: 0.0009, P99: 0.0082 CUMRET: sim -0.4859 + actual -0.8712 = -0.9338 SDS:^SP500TR params: expense_incr=0.0004 RMSE: 0.0101, MAE: 0.0091, RETRMSE: 0.0014, RETMAE: 0.0009, CAGR: -0.0008, VOL: 0.0046, P99: 0.0162 CUMRET: sim -0.9387 + actual -0.9886 = -0.9993 SPXS:^SP500TR params: expense_incr=0.0049 RMSE: 0.0116, MAE: 0.0104, RETRMSE: 0.0021, RETMAE: 0.0013, CAGR: -0.0014, VOL: 0.0069, P99: 0.0196 CUMRET: sim -0.9998 + actual -0.9905 = -1.0000 QLD:QQQ params: expense_incr=0.0016 RMSE: 0.0036, MAE: 0.0029, RETRMSE: 0.0014, RETMAE: 0.0008, CAGR: 0.0007, VOL: 0.0021, P99: 0.0082 CUMRET: sim -0.9257 + actual 67.6139 = 4.0963 TQQQ:QQQ params: expense_incr=0.0041 RMSE: 0.0077, MAE: 0.0056, RETRMSE: 0.0021, RETMAE: 0.0011, CAGR: 0.003, VOL: 0.0093, P99: 0.0234 CUMRET: sim -0.9936 + actual 115.4853 = -0.2511 PSQ:QQQ params: expense_incr=0.004 RMSE: 0.0063, MAE: 0.0056, RETRMSE: 0.0008, RETMAE: 0.0005, CAGR: -0.0005, VOL: 0.0007, P99: 0.0099 CUMRET: sim -0.1830 + actual -0.9493 = -0.9586 QID:QQQ params: expense_incr=0.0091 RMSE: 0.0168, MAE: 0.0151, RETRMSE: 0.0013, RETMAE: 0.0008, CAGR: -0.0005, VOL: 0.0024, P99: 0.026 CUMRET: sim -0.8516 + actual -0.9984 = -0.9998 SQQQ:QQQ params: expense_incr=0.0095 RMSE: 0.0147, MAE: 0.0124, RETRMSE: 0.0023, RETMAE: 0.0012, CAGR: 0.0004, VOL: 0.0077, P99: 0.0274 CUMRET: sim -0.9983 + actual -0.9999 = -1.0000 MVV:MDY params: expense_incr=0.0 RMSE: 0.0035, MAE: 0.003, RETRMSE: 0.0016, RETMAE: 0.001, CAGR: 0.0003, VOL: 0.001, P99: 0.0069 CUMRET: sim 3.4818 + actual 6.7478 = 33.7239 MYY:MDY params: expense_incr=0.005 RMSE: 0.0023, MAE: 0.0017, RETRMSE: 0.0018, RETMAE: 0.0012, CAGR: -0.0003, VOL: -0.0016, P99: 0.006 CUMRET: sim -0.7476 + actual -0.8396 = -0.9595 MZZ:MDY params: expense_incr=0.0102 RMSE: 0.0071, MAE: 0.0057, RETRMSE: 0.0043, RETMAE: 0.0024, CAGR: -0.0009, VOL: 0.0082, P99: 0.0171 CUMRET: sim -0.9805 + actual -0.9829 = -0.9997 UWM:IWM params: expense_incr=-0.0 RMSE: 0.0073, MAE: 0.0054, RETRMSE: 0.0013, RETMAE: 0.0009, CAGR: 0.0023, VOL: 0.0008, P99: 0.0222 CUMRET: sim -0.1270 + actual 6.3601 = 5.4254 TNA:IWM params: expense_incr=0.0078 RMSE: 0.0185, MAE: 0.0138, RETRMSE: 0.0022, RETMAE: 0.0011, CAGR: 0.007, VOL: 0.0092, P99: 0.0495 CUMRET: sim -0.6340 + actual 4.1266 = 0.8762 RWM:IWM params: expense_incr=0.0097 RMSE: 0.0076, MAE: 0.0062, RETRMSE: 0.0009, RETMAE: 0.0006, CAGR: -0.0002, VOL: -0.0008, P99: 0.0153 CUMRET: sim -0.5357 + actual -0.8627 = -0.9363 TWM:IWM params: expense_incr=0.0278 RMSE: 0.034, MAE: 0.0299, RETRMSE: 0.0018, RETMAE: 0.001, CAGR: -0.0009, VOL: 0.0034, P99: 0.0537 CUMRET: sim -0.7907 + actual -0.9957 = -0.9991 TZA:IWM params: expense_incr=0.0328 RMSE: 0.0235, MAE: 0.0191, RETRMSE: 0.0023, RETMAE: 0.0012, CAGR: 0.0016, VOL: 0.0038, P99: 0.0592 CUMRET: sim -0.9991 + actual -0.9947 = -1.0000 URE:IYR params: expense_incr=-0.0023 RMSE: 0.0066, MAE: 0.0047, RETRMSE: 0.0019, RETMAE: 0.0012, CAGR: 0.0025, VOL: -0.0001, P99: 0.0248 CUMRET: sim 0.0215 + actual 3.1371 = 3.2260 REK:IYR params: expense_incr=0.009 RMSE: 0.0048, MAE: 0.0041, RETRMSE: 0.0026, RETMAE: 0.0018, CAGR: -0.0007, VOL: -0.0001, P99: 0.0109 CUMRET: sim -0.8353 + actual -0.7075 = -0.9518 UPW:IDU params: expense_incr=-0.0006 RMSE: 0.0085, MAE: 0.0068, RETRMSE: 0.0072, RETMAE: 0.0046, CAGR: 0.0013, VOL: 0.0069, P99: 0.0221 CUMRET: sim -0.1332 + actual 5.1907 = 4.3659 SDP:IDU params: expense_incr=0.0142 RMSE: 0.0085, MAE: 0.0069, RETRMSE: 0.0075, RETMAE: 0.005, CAGR: 0.0003, VOL: 0.0032, P99: 0.0229 CUMRET: sim -0.6933 + actual -0.9737 = -0.9919 EFO:EFA params: expense_incr=0.0057 RMSE: 0.0179, MAE: 0.0125, RETRMSE: 0.0169, RETMAE: 0.0086, CAGR: 0.0013, VOL: -0.0354, P99: 0.05 CUMRET: sim -0.2502 + actual 1.4661 = 0.8489 EFZ:EFA params: expense_incr=0.0044 RMSE: 0.0027, MAE: 0.0023, RETRMSE: 0.001, RETMAE: 0.0007, CAGR: -0.0003, VOL: -0.0002, P99: 0.0052 CUMRET: sim -0.4973 + actual -0.6457 = -0.8219 EFU:EFA params: expense_incr=0.0121 RMSE: 0.0104, MAE: 0.0088, RETRMSE: 0.0056, RETMAE: 0.0029, CAGR: -0.0008, VOL: 0.0053, P99: 0.0213 CUMRET: sim -0.8723 + actual -0.9159 = -0.9893 EET:EEM params: expense_incr=0.0029 RMSE: 0.0051, MAE: 0.0038, RETRMSE: 0.0049, RETMAE: 0.0031, CAGR: 0.0, VOL: 0.0058, P99: 0.0166 CUMRET: sim 2.3858 + actual 0.9645 = 5.6516 EUM:EEM params: expense_incr=0.0103 RMSE: 0.0037, MAE: 0.0029, RETRMSE: 0.0012, RETMAE: 0.0007, CAGR: -0.0006, VOL: 0.0008, P99: 0.0082 CUMRET: sim -0.7730 + actual -0.8369 = -0.9630 EEV:EEM params: expense_incr=0.0193 RMSE: 0.0057, MAE: 0.0045, RETRMSE: 0.0014, RETMAE: 0.0011, CAGR: 0.0, VOL: 0.0015, P99: 0.0133 CUMRET: sim -0.9973 + actual -0.8999 = -0.9997 UST:IEF params: expense_incr=-0.0031 RMSE: 0.0027, MAE: 0.0024, RETRMSE: 0.0012, RETMAE: 0.0008, CAGR: 0.0004, VOL: 0.0025, P99: 0.0052 CUMRET: sim 1.6662 + actual 0.5268 = 3.0706 TYD:IEF params: expense_incr=-0.0041 RMSE: 0.0122, MAE: 0.0093, RETRMSE: 0.0078, RETMAE: 0.0041, CAGR: -0.0002, VOL: -0.0221, P99: 0.0292 CUMRET: sim 1.0253 + actual 2.3705 = 5.8264 TBX:IEF params: expense_incr=0.0057 RMSE: 0.0022, MAE: 0.0016, RETRMSE: 0.0022, RETMAE: 0.0013, CAGR: -0.0003, VOL: -0.0063, P99: 0.0072 CUMRET: sim -0.2608 + actual -0.3840 = -0.5447 PST:IEF params: expense_incr=0.0111 RMSE: 0.006, MAE: 0.005, RETRMSE: 0.0012, RETMAE: 0.0008, CAGR: -0.0013, VOL: 0.0016, P99: 0.0134 CUMRET: sim -0.5258 + actual -0.6195 = -0.8196 TYO:IEF params: expense_incr=0.0379 RMSE: 0.0089, MAE: 0.0061, RETRMSE: 0.004, RETMAE: 0.0026, CAGR: -0.0041, VOL: 0.0002, P99: 0.0297 CUMRET: sim -0.7607 + actual -0.8147 = -0.9557 UBT:TLT params: expense_incr=-0.0063 RMSE: 0.0089, MAE: 0.0079, RETRMSE: 0.0036, RETMAE: 0.0018, CAGR: 0.0018, VOL: 0.005, P99: 0.0164 CUMRET: sim 0.8182 + actual 2.4924 = 5.3499 TMF:TLT params: expense_incr=-0.0004 RMSE: 0.0048, MAE: 0.0036, RETRMSE: 0.0019, RETMAE: 0.0012, CAGR: -0.0007, VOL: 0.0069, P99: 0.0116 CUMRET: sim 0.9309 + actual 3.0203 = 6.7628 TBF:TLT params: expense_incr=0.005 RMSE: 0.0031, MAE: 0.0025, RETRMSE: 0.0017, RETMAE: 0.0006, CAGR: -0.0006, VOL: 0.0041, P99: 0.0061 CUMRET: sim -0.3379 + actual -0.6210 = -0.7490 TBT:TLT params: expense_incr=0.0081 RMSE: 0.0054, MAE: 0.0033, RETRMSE: 0.0031, RETMAE: 0.001, CAGR: -0.0006, VOL: 0.0115, P99: 0.0111 CUMRET: sim -0.6536 + actual -0.8782 = -0.9578 TMV:TLT params: expense_incr=0.0249 RMSE: 0.0114, MAE: 0.0086, RETRMSE: 0.0015, RETMAE: 0.0011, CAGR: -0.0026, VOL: 0.0044, P99: 0.0338 CUMRET: sim -0.8622 + actual -0.9721 = -0.9962 UGL:GLD params: expense_incr=0.0126 RMSE: 0.0223, MAE: 0.02, RETRMSE: 0.0019, RETMAE: 0.0011, CAGR: 0.0028, VOL: -0.0019, P99: 0.0387 CUMRET: sim 0.8761 + actual 1.5931 = 3.8649 GLL:GLD params: expense_incr=0.0202 RMSE: 0.0347, MAE: 0.0301, RETRMSE: 0.002, RETMAE: 0.0012, CAGR: -0.0058, VOL: -0.0018, P99: 0.0807 CUMRET: sim -0.7492 + actual -0.9364 = -0.9840
err.eval("CAGR = abs(CAGR)\nVOL = abs(VOL)").describe()
RMSE | MAE | RETRMSE | RETMAE | CAGR | VOL | P99 | |
---|---|---|---|---|---|---|---|
count | 51.000000 | 51.000000 | 51.000000 | 51.000000 | 51.000000 | 51.000000 | 51.000000 |
mean | 0.059082 | 0.055613 | 0.004056 | 0.001959 | 0.003953 | 0.007012 | 0.082770 |
std | 0.060218 | 0.057672 | 0.002794 | 0.001371 | 0.003434 | 0.007434 | 0.072317 |
min | 0.002193 | 0.001624 | 0.001318 | 0.000651 | 0.000007 | 0.000145 | 0.007191 |
25% | 0.015992 | 0.012469 | 0.002244 | 0.001185 | 0.001375 | 0.001951 | 0.028229 |
50% | 0.030250 | 0.027716 | 0.002899 | 0.001452 | 0.002789 | 0.005022 | 0.056955 |
75% | 0.100916 | 0.097069 | 0.005012 | 0.002401 | 0.006293 | 0.009315 | 0.126619 |
max | 0.240724 | 0.221112 | 0.016876 | 0.008556 | 0.013838 | 0.035371 | 0.308155 |
lev_colors = {name: color_leverage(factor, alpha=0.2) for name, (_, factor, _, _, _) in FUNDS.items()}
fig = tells.plot(color_discrete_map=lev_colors, title='Telltale, Simulated vs. Actual Leveraged ETFs<br>In-sample fit', render_mode='webgl')\
.update_layout(yaxis_title='Simulated / Actual', legend_title_text='Green = long<br>Red = short')
rasterize(fig, RASTER, filename='images/telltales.png')
TMF's proxy TLT only goes back to 2002, so we use VUSTX before that. It's not an exact proxy, but better than nothing. ¯_(ツ)_/¯
_, factor, exp, _, start = FUNDS['TMF']
sim_vustx = plotbest(prices.loc[start:, 'TMF'].dropna(), prices['VUSTX'], factor, exp);
TMF:VUSTX params: expense_incr=-0.0048 RMSE: 0.0213, MAE: 0.0158, RETRMSE: 0.0052, RETMAE: 0.0027, CAGR: 0.004, VOL: -0.036, P99: 0.058 CUMRET: sim 12.8656 + actual 3.0203 = 54.7434
Splice VUSTX + TLT + TMF
sim['TMF'] = splice(sim_vustx, sim['TMF'])
plotret(cat(splice(prices['VUSTX'].dropna(), prices['TLT'].dropna()), sim['TMF']), title='The Great Bond Bull Run in One Figure')
def is_mutual_fund(ticker):
return len(ticker) == 5 and ticker.endswith('X')
filename = 'extended-leveraged-etfs.csv'
pd.concat((data for name, data in sim.items() if not is_mutual_fund(name)), axis=1, verify_integrity=True).to_csv(filename, float_format='%.5f')
!du -h $filename; echo
!head -3 $filename; echo; tail -2 $filename
2.4M extended-leveraged-etfs.csv Date,SSO,UPRO,SPXL,SH,SDS,SPXS,QLD,TQQQ,PSQ,QID,SQQQ,MVV,MYY,MZZ,UWM,TNA,RWM,TWM,TZA,URE,REK,UPW,SDP,EFO,EFZ,EFU,EET,EUM,EEV,UST,TYD,TBX,PST,TYO,UBT,TMF,TBF,TBT,TMV,UGL,GLL 1986-05-19,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.66995,,,,, 1986-05-20,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.66555,,,,, 2021-01-21,96.32000,83.05000,78.06000,17.46000,11.70000,35.89000,124.33000,101.44000,13.80000,26.01000,13.28000,53.80000,27.58000,11.50000,104.93000,83.45000,23.35000,16.85000,5.28000,62.39000,11.32000,56.77000,18.44000,46.85000,19.66000,12.06000,113.98000,11.79000,15.01000,71.78000,57.06000,24.20000,15.86000,8.32000,56.86000,30.98000,16.39000,17.78000,60.72000,65.39000,31.99000 2021-01-22,95.62000,82.21000,77.23000,17.52000,11.79000,36.27000,123.55000,100.52000,13.86000,26.18000,13.40000,54.16000,27.49000,11.43000,107.43000,86.29000,23.09000,16.46000,5.09000,62.59000,11.29000,57.04000,18.33000,46.49000,19.74000,12.16000,111.82000,11.90000,15.33000,71.97000,57.32000,24.16000,15.81000,8.41000,57.19000,31.20000,16.34000,17.68000,60.18000,64.36000,32.50000
display(HTML(f'<h3><a href="{filename}" download>Download CSV</a></h3>'))
assert False, "The note-buck stops here"
Experiements to finds the (sub)set of curve fitting parameters that minimize the out-of-sample prediction error.
This isn't necessary to use the leveraging machinery above.
def oos_error(funds, prices, param_ranges):
"""Find the best leverage parameters for the last half of each fund in `funds`, then use them
to leverage the first half of each fund and compute the error."""
tells = {}
params = {}
err = {}
for name, (proxy, factor, exp, _, start) in funds.items():
# Cut out initial bad data from leveraged fund, align with proxy
proxy = prices[proxy].dropna()
lfund, _ = align(prices.loc[start:, name], proxy) # We do *not* want to modify the proxy
# Find best params for last half of data
assert len(lfund) > 500, "That's not enough data!"
mid = len(lfund) // 2
params[name] = find_params(lfund.iloc[mid:], proxy, factor, exp, params=param_ranges)
# Use params to leverage all data
lev = leverage(proxy, factor, exp, **params[name])
# Compute error on first half (out of sample)
err[name] = errstats(lfund.iloc[:mid], lev)
# Plot the whole thing
lfund, lev = align(prices[name], lev, norm=True) # Plot whole series including bad initial data
tells[name] = lev.div(lfund).rename(name)
return pd.DataFrame.from_dict(err, orient='index'), pd.DataFrame.from_dict(params, orient='index'), pd.concat(tells, axis=1, verify_integrity=True)
def powerset(iterable):
"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
s = list(iterable)
return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
%%time
# Compute out of sample error for all subsets of fitting parameters, show median + IQR for RMSE and CAGR
# This gives (roughly) equal weight to each fund in the error
result = []
for params in powerset(ALL_LEV_PARAMS):
combo = ' + '.join(params) or 'none'
print(combo)
err, params, tells = oos_error(FUNDS, prices, {p: ALL_LEV_PARAMS[p] for p in params})
summ = err.eval("CAGR = abs(CAGR)\nVOL = abs(VOL)").describe()
summ.loc['iqr', :] = summ.loc['75%', :] - summ.loc['25%', :]
result.append({'params': combo,
'RMSE': summ.loc['50%', 'RMSE'], 'RMSE_iqr': summ.loc['iqr', 'RMSE'],
'CAGR': summ.loc['50%', 'CAGR'], 'CAGR_iqr': summ.loc['iqr', 'CAGR'],
})
#display(summ)
#display(params)
result = pd.DataFrame(result)
none factor_scale factor_incr borrow_scale expense_incr factor_scale + factor_incr factor_scale + borrow_scale factor_scale + expense_incr factor_incr + borrow_scale factor_incr + expense_incr borrow_scale + expense_incr factor_scale + factor_incr + borrow_scale factor_scale + factor_incr + expense_incr factor_scale + borrow_scale + expense_incr factor_incr + borrow_scale + expense_incr factor_scale + factor_incr + borrow_scale + expense_incr CPU times: user 3min 37s, sys: 0 ns, total: 3min 37s Wall time: 3min 37s
result.set_index('params').style.background_gradient()
RMSE | RMSE_iqr | CAGR | CAGR_iqr | |
---|---|---|---|---|
params | ||||
none | 0.022689 | 0.028858 | 0.005701 | 0.007168 |
factor_scale | 0.015725 | 0.021945 | 0.003687 | 0.004904 |
factor_incr | 0.023671 | 0.037646 | 0.005908 | 0.007390 |
borrow_scale | 0.023671 | 0.037646 | 0.005909 | 0.008038 |
expense_incr | 0.012307 | 0.014671 | 0.003095 | 0.003403 |
factor_scale + factor_incr | 0.017657 | 0.021819 | 0.004295 | 0.006776 |
factor_scale + borrow_scale | 0.017657 | 0.021819 | 0.004583 | 0.008098 |
factor_scale + expense_incr | 0.012738 | 0.015216 | 0.002846 | 0.002978 |
factor_incr + borrow_scale | 0.023671 | 0.037646 | 0.005907 | 0.008038 |
factor_incr + expense_incr | 0.013525 | 0.017040 | 0.003036 | 0.003946 |
borrow_scale + expense_incr | 0.013525 | 0.016626 | 0.003036 | 0.004190 |
factor_scale + factor_incr + borrow_scale | 0.017658 | 0.021817 | 0.004583 | 0.008097 |
factor_scale + factor_incr + expense_incr | 0.015097 | 0.016749 | 0.003169 | 0.005262 |
factor_scale + borrow_scale + expense_incr | 0.015101 | 0.016577 | 0.003018 | 0.005341 |
factor_incr + borrow_scale + expense_incr | 0.013525 | 0.016626 | 0.003036 | 0.004190 |
factor_scale + factor_incr + borrow_scale + expense_incr | 0.015099 | 0.016577 | 0.003017 | 0.005344 |
# Show error stats for best model / fit params
err, params, tells = oos_error(FUNDS, prices, LEV_PARAMS)
err.describe()
RMSE | MAE | RETRMSE | RETMAE | CAGR | VOL | P99 | |
---|---|---|---|---|---|---|---|
count | 51.000000 | 51.000000 | 51.000000 | 51.000000 | 51.000000 | 51.000000 | 51.000000 |
mean | 0.017885 | 0.015895 | 0.002834 | 0.001632 | 0.000760 | -0.001060 | 0.029401 |
std | 0.016292 | 0.015347 | 0.003206 | 0.001656 | 0.004645 | 0.010417 | 0.022115 |
min | 0.002183 | 0.001781 | 0.000670 | 0.000303 | -0.013773 | -0.058847 | 0.005631 |
25% | 0.008044 | 0.006655 | 0.001351 | 0.000838 | -0.002479 | -0.000788 | 0.015516 |
50% | 0.012307 | 0.011432 | 0.001593 | 0.001096 | 0.001048 | 0.000825 | 0.021445 |
75% | 0.022715 | 0.019881 | 0.002907 | 0.001720 | 0.003587 | 0.004100 | 0.036840 |
max | 0.083637 | 0.078860 | 0.021073 | 0.010841 | 0.010733 | 0.006654 | 0.107646 |
params.describe()
expense_incr | |
---|---|
count | 51.000000 |
mean | 0.006456 |
std | 0.008109 |
min | -0.007002 |
25% | 0.000810 |
50% | 0.005016 |
75% | 0.010139 |
max | 0.035539 |
fig = tells.plot(color_discrete_map=lev_colors, title='Telltale, Simulated vs. Actual Leveraged ETFs<br>Out-of-sample fit')\
.update_layout(yaxis_title='Simulated / Actual', legend_title_text='Green = long<br>Red = short')
rasterize(fig, RASTER, filename='images/telltales-oos.png')