LaTeX macros (hidden cell) $ \newcommand{\Q}{\mathcal{Q}} \newcommand{\ECov}{\boldsymbol{\Sigma}} \newcommand{\EMean}{\boldsymbol{\mu}} \newcommand{\EAlpha}{\boldsymbol{\alpha}} \newcommand{\EBeta}{\boldsymbol{\beta}} $
import sys
import os
import re
import datetime as dt
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from mosek.fusion import *
from notebook.services.config import ConfigManager
from portfolio_tools import data_download, DataReader, compute_inputs
# Version checks
print(sys.version)
print('matplotlib: {}'.format(matplotlib.__version__))
# Jupyter configuration
c = ConfigManager()
c.update('notebook', {"CodeCell": {"cm_config": {"autoCloseBrackets": False}}})
# Numpy options
np.set_printoptions(precision=5, linewidth=120, suppress=True)
# Pandas options
pd.set_option('display.max_rows', None)
# Matplotlib options
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 200
3.9.7 (default, Sep 16 2021, 13:09:58) [GCC 7.5.0] matplotlib: 3.7.2
Here we load the raw data that will be used to compute the optimization input variables, the vector $\EMean$ of expected returns and the covariance matrix $\ECov$. The data consists of daily stock prices of $8$ stocks from the US market.
# Data downloading:
# If the user has an API key for alphavantage.co, then this code part will download the data.
# The code can be modified to download from other sources. To be able to run the examples,
# and reproduce results in the cookbook, the files have to have the following format and content:
# - File name pattern: "daily_adjusted_[TICKER].csv", where TICKER is the symbol of a stock.
# - The file contains at least columns "timestamp", "adjusted_close", and "volume".
# - The data is daily price/volume, covering at least the period from 2016-03-18 until 2021-03-18,
# - Files are for the stocks PM, LMT, MCD, MMM, AAPL, MSFT, TXN, CSCO.
list_stocks = ["PM", "LMT", "MCD", "MMM", "AAPL", "MSFT", "TXN", "CSCO"]
list_factors = []
alphaToken = None
list_tickers = list_stocks + list_factors
if alphaToken is not None:
data_download(list_tickers, alphaToken)
We load the daily stock price data from the downloaded CSV files. The data is adjusted for splits and dividends. Then a selected time period is taken from the data.
investment_start = "2016-03-18"
investment_end = "2021-03-18"
# The files are in "stock_data" folder, named as "daily_adjusted_[TICKER].csv"
dr = DataReader(folder_path="stock_data", symbol_list=list_tickers)
dr.read_data()
df_prices, _ = dr.get_period(start_date=investment_start, end_date=investment_end)
Found data files: stock_data/daily_adjusted_AAPL.csv stock_data/daily_adjusted_PM.csv stock_data/daily_adjusted_CSCO.csv stock_data/daily_adjusted_TXN.csv stock_data/daily_adjusted_MMM.csv stock_data/daily_adjusted_IWM.csv stock_data/daily_adjusted_MCD.csv stock_data/daily_adjusted_SPY.csv stock_data/daily_adjusted_MSFT.csv stock_data/daily_adjusted_LMT.csv Using data files: stock_data/daily_adjusted_PM.csv stock_data/daily_adjusted_LMT.csv stock_data/daily_adjusted_MCD.csv stock_data/daily_adjusted_MMM.csv stock_data/daily_adjusted_AAPL.csv stock_data/daily_adjusted_MSFT.csv stock_data/daily_adjusted_TXN.csv stock_data/daily_adjusted_CSCO.csv
First we generate return scenario data using Monte Carlo method based on the moments of historical prices.
# Number of scenarios
T = 99999
# Mean and covariance of historical log-returns.
m_log, S_log = compute_inputs(df_prices, return_log=True)
# Generate logarithmic return scenarios assuming normal distribution
scenarios_log = np.random.default_rng().multivariate_normal(m_log, S_log, T)
# Convert logarithmic return scenarios to linear return scenarios
scenarios_lin = np.exp(scenarios_log) - 1
# Scenario probabilities
p = np.ones(T) / T
We would like to optimize the 95% EVaR of the portfolio loss distribution.
# Confidence level
alpha = 0.95
# Primal CVaR formula (for testing)
def CVaR(alpha, p, q):
# We need to be careful that math index starts from 1 but numpy starts from 0 (matters in formulas like ceil(alpha * T))
T = q.shape[0]
# Starting index
i_alpha = np.sort(np.nonzero(np.cumsum(p) >= alpha)[0])[0]
# Weight of VaR component in CVaR
lambda_alpha = (sum(p[:(i_alpha + 1)]) - alpha) / (1 - alpha)
# CVaR
sort_idx = np.argsort(q)
sorted_q = q[sort_idx]
sorted_p = p[sort_idx]
cvar = lambda_alpha * sorted_q[i_alpha] + np.dot(sorted_p[(i_alpha + 1):], sorted_q[(i_alpha + 1):]) / (1 - alpha)
return cvar
Below we implement the optimization model in Fusion API. We create it inside a function so we can call it later.
# s * log(sum_i(p_i * exp(x_i / s))) <= t
def persplogsumexp(M, x, s, p, t):
n = int(x.getSize())
u = M.variable(n)
M.constraint(Expr.hstack(u, Var.repeat(s, n), Expr.sub(x, Var.repeat(t, n))), Domain.inPExpCone())
M.constraint(Expr.sub(Expr.dot(p, u), s), Domain.lessThan(0.0))
def EfficientFrontier(N, T, m, R, p, alpha, deltas):
with Model("Case study") as M:
# Settings
#M.setLogHandler(sys.stdout)
# Variables
# The variable x is the fraction of holdings relative to the initial capital.
# It is constrained to take only positive values.
x = M.variable("x", N, Domain.greaterThan(0.0))
# Budget constraint
M.constraint('budget', Expr.sum(x), Domain.equalsTo(1.0))
# Auxiliary variables.
z = M.variable("z", 1, Domain.unbounded())
t = M.variable("t", 1, Domain.greaterThan(0.0))
# Constraint modeling perspective of log-sum-exp
persplogsumexp(M, Expr.mul(-R, x), t, p, z)
# Objective
delta = M.parameter()
evar_term = Expr.sub(z, Expr.mul(t, np.log(1.0 - alpha)))
M.objective('obj', ObjectiveSense.Maximize, Expr.sub(Expr.dot(m, x), Expr.mul(delta, evar_term)))
# Create DataFrame to store the results.
columns = ["delta", "obj", "return", "risk", "evar"] + df_prices.columns.tolist()
df_result = pd.DataFrame(columns=columns)
for d in deltas:
# Update parameter
delta.setValue(d);
# Solve optimization
M.solve()
# Check if the solution is an optimal point
solsta = M.getPrimalSolutionStatus()
if (solsta != SolutionStatus.Optimal):
# See https://docs.mosek.com/latest/pythonfusion/accessing-solution.html about handling solution statuses.
raise Exception("Unexpected solution status!")
# Save results
portfolio_return = m @ x.level()
portfolio_risk = z.level()[0] - t.level()[0] * np.log(1.0 - alpha)
cvar = CVaR(alpha, p, -R @ x.level())
row = pd.Series([d, M.primalObjValue(), portfolio_return, portfolio_risk, cvar] + list(x.level()), index=columns)
df_result = pd.concat([df_result, pd.DataFrame([row])], ignore_index=True)
# Check CVaR value using primal formula
print(f"Relative difference between EVaR and CVaR (%): {(cvar - portfolio_risk) / portfolio_risk * 100}")
return df_result
Here we use the loaded daily price data to compute the corresponding yearly mean return and covariance matrix.
# Number of securities
N = df_prices.shape[1]
# Get optimization parameters
m, _ = compute_inputs(df_prices)
We run the optimization for a range of risk aversion parameter values: $\delta = 10^{-1},\dots,10^{2}$. We compute the efficient frontier this way both with and without using shrinkage estimation.
# Compute efficient frontier with and without shrinkage
deltas = np.logspace(start=-1, stop=2, num=20)[::-1]
df_result = EfficientFrontier(N, T, m, scenarios_lin, p, alpha, deltas)
Relative difference between EVaR and CVaR (%): -33.42378204398512 Relative difference between EVaR and CVaR (%): -33.45646972266584 Relative difference between EVaR and CVaR (%): -33.50204787771225 Relative difference between EVaR and CVaR (%): -33.56488606024007 Relative difference between EVaR and CVaR (%): -33.65252039086718 Relative difference between EVaR and CVaR (%): -33.77210072922806 Relative difference between EVaR and CVaR (%): -33.90833145658367 Relative difference between EVaR and CVaR (%): -34.06647761773668 Relative difference between EVaR and CVaR (%): -34.228864613910865 Relative difference between EVaR and CVaR (%): -34.32076597469258 Relative difference between EVaR and CVaR (%): -34.26903206118584 Relative difference between EVaR and CVaR (%): -34.0913142099972 Relative difference between EVaR and CVaR (%): -33.706820776915826 Relative difference between EVaR and CVaR (%): -32.87550621993 Relative difference between EVaR and CVaR (%): -32.13821514072266 Relative difference between EVaR and CVaR (%): -31.414840069754284 Relative difference between EVaR and CVaR (%): -29.94969115976413 Relative difference between EVaR and CVaR (%): -27.216047490936095 Relative difference between EVaR and CVaR (%): -23.044884017560292 Relative difference between EVaR and CVaR (%): -20.845617388377192
Check the results.
df_result
delta | obj | return | risk | evar | PM | LMT | MCD | MMM | AAPL | MSFT | TXN | CSCO | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 100.000000 | -14.809388 | 0.347111 | 0.151565 | 0.100906 | -2.712381e-10 | 1.494921e-02 | 1.363686e-01 | 2.281447e-10 | 0.090220 | 5.360870e-01 | 2.223747e-01 | 3.692416e-08 |
1 | 69.519280 | -10.189479 | 0.347760 | 0.151573 | 0.100862 | -1.494799e-10 | 1.380761e-02 | 1.347412e-01 | 8.227581e-10 | 0.090979 | 5.380318e-01 | 2.224405e-01 | 2.026158e-08 |
2 | 48.329302 | -6.977512 | 0.348694 | 0.151589 | 0.100804 | -2.945664e-10 | 1.216126e-02 | 1.323962e-01 | -2.890110e-10 | 0.092075 | 5.408365e-01 | 2.225308e-01 | 5.209527e-10 |
3 | 33.598183 | -4.744225 | 0.350042 | 0.151623 | 0.100731 | -1.268907e-10 | 9.786780e-03 | 1.290179e-01 | -1.236874e-10 | 0.093662 | 5.448811e-01 | 2.226519e-01 | -5.141085e-11 |
4 | 23.357215 | -3.191162 | 0.351989 | 0.151694 | 0.100645 | 9.499372e-11 | 6.357014e-03 | 1.241467e-01 | 1.313828e-09 | 0.095965 | 5.507230e-01 | 2.228084e-01 | 7.211346e-08 |
5 | 16.237767 | -2.110756 | 0.354809 | 0.151841 | 0.100561 | -3.079113e-10 | 1.390277e-03 | 1.171098e-01 | -2.952811e-10 | 0.099325 | 5.591803e-01 | 2.229945e-01 | -1.388656e-10 |
6 | 11.288379 | -1.358693 | 0.358261 | 0.152099 | 0.100525 | -4.289182e-10 | 1.324001e-07 | 1.038537e-01 | -3.300871e-10 | 0.104016 | 5.700917e-01 | 2.220389e-01 | 4.122753e-09 |
7 | 7.847600 | -0.834635 | 0.363036 | 0.152616 | 0.100625 | -7.075282e-10 | 1.695273e-08 | 8.366961e-02 | -2.133383e-10 | 0.110821 | 5.854145e-01 | 2.200944e-01 | 4.496131e-09 |
8 | 5.455595 | -0.468526 | 0.369963 | 0.153693 | 0.101086 | -6.042409e-10 | 5.839406e-08 | 5.453391e-02 | 1.396929e-10 | 0.120911 | 6.076218e-01 | 2.169333e-01 | 1.057438e-08 |
9 | 3.792690 | -0.211414 | 0.380061 | 0.155952 | 0.102428 | 4.562780e-09 | 1.048043e-07 | 1.233776e-02 | 9.746963e-09 | 0.135994 | 6.399893e-01 | 2.116789e-01 | 7.122438e-08 |
10 | 2.636651 | -0.030093 | 0.384901 | 0.157394 | 0.103457 | -2.675437e-10 | 8.275791e-09 | 1.358343e-07 | 7.311167e-10 | 0.153269 | 6.565735e-01 | 1.901573e-01 | 7.955828e-09 |
11 | 1.832981 | 0.096914 | 0.388271 | 0.158953 | 0.104764 | 1.299553e-09 | 4.993040e-08 | 4.729194e-07 | 2.645259e-09 | 0.176696 | 6.686228e-01 | 1.546808e-01 | 3.657430e-08 |
12 | 1.274275 | 0.186450 | 0.393112 | 0.162180 | 0.107514 | -2.211452e-10 | 6.756127e-10 | 4.488115e-08 | 9.512924e-10 | 0.211567 | 6.840754e-01 | 1.043575e-01 | 4.037568e-09 |
13 | 0.885867 | 0.250502 | 0.400035 | 0.168798 | 0.113305 | 2.755646e-09 | -1.266690e-11 | 2.424045e-08 | 3.777747e-10 | 0.264047 | 7.022571e-01 | 3.369572e-02 | 2.769004e-09 |
14 | 0.615848 | 0.297040 | 0.404439 | 0.174391 | 0.118345 | -2.052848e-11 | -1.011557e-11 | 1.175816e-11 | -1.748460e-11 | 0.319658 | 6.803416e-01 | 8.585560e-10 | -6.242794e-12 |
15 | 0.428133 | 0.330133 | 0.406807 | 0.179091 | 0.122830 | 5.774546e-10 | 3.875773e-10 | 2.384531e-08 | 1.150304e-09 | 0.385380 | 6.146204e-01 | 2.126529e-08 | 2.074802e-09 |
16 | 0.297635 | 0.354048 | 0.410417 | 0.189392 | 0.132669 | 6.395015e-10 | 6.525280e-10 | 1.226104e-09 | 2.599322e-09 | 0.485545 | 5.144548e-01 | 3.869882e-08 | 3.423996e-09 |
17 | 0.206914 | 0.372056 | 0.415926 | 0.212022 | 0.154318 | 6.981106e-10 | 1.975800e-09 | 5.781582e-09 | 3.504131e-10 | 0.638413 | 3.615865e-01 | 1.405329e-08 | 2.153144e-09 |
18 | 0.143845 | 0.386679 | 0.424172 | 0.260645 | 0.200580 | 2.219064e-09 | 6.181270e-09 | 1.355847e-08 | 4.917751e-09 | 0.867205 | 1.327948e-01 | 1.368773e-08 | 2.201932e-09 |
19 | 0.100000 | 0.399278 | 0.428958 | 0.296801 | 0.234931 | -1.725104e-12 | 1.257411e-11 | 2.114564e-11 | -4.253230e-13 | 1.000000 | 1.343903e-08 | 1.405113e-10 | 4.463059e-12 |
Plot the efficient frontier.
ax = df_result.plot(x="risk", y="return", style="-o",
xlabel="portfolio risk (EVaR)", ylabel="portfolio return", grid=True)
ax.legend(["return"]);
Plot the portfolio composition.
# Round small values to 0 to make plotting work
mask = np.absolute(df_result) < 1e-7
mask.iloc[:, :-8] = False
df_result[mask] = 0
my_cmap = LinearSegmentedColormap.from_list("non-extreme gray", ["#111111", "#eeeeee"], N=256, gamma=1.0)
ax = df_result.set_index('risk').iloc[:, 4:].plot.area(colormap=my_cmap, xlabel='portfolio risk (EVaR)', ylabel="x")
ax.grid(which='both', axis='x', linestyle=':', color='k', linewidth=1)