LaTeX macros (hidden cell) $ \newcommand{\Q}{\mathcal{Q}} \newcommand{\ECov}{\boldsymbol{\Sigma}} \newcommand{\EMean}{\boldsymbol{\mu}} \newcommand{\EAlpha}{\boldsymbol{\alpha}} \newcommand{\EBeta}{\boldsymbol{\beta}} $
import sys
import os
import re
import datetime as dt
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from mosek.fusion import *
from notebook.services.config import ConfigManager
from portfolio_tools import data_download, DataReader, compute_inputs
# Version checks
print(sys.version)
print('matplotlib: {}'.format(matplotlib.__version__))
# Jupyter configuration
c = ConfigManager()
c.update('notebook', {"CodeCell": {"cm_config": {"autoCloseBrackets": False}}})
# Numpy options
np.set_printoptions(precision=5, linewidth=120, suppress=True)
# Pandas options
pd.set_option('display.max_rows', None)
# Matplotlib options
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 200
3.9.7 (default, Sep 16 2021, 13:09:58) [GCC 7.5.0] matplotlib: 3.7.2
Here we load the raw data that will be used to compute the optimization input variables, the vector $\EMean$ of expected returns and the covariance matrix $\ECov$. The data consists of daily stock prices of $8$ stocks from the US market.
# Data downloading:
# If the user has an API key for alphavantage.co, then this code part will download the data.
# The code can be modified to download from other sources. To be able to run the examples,
# and reproduce results in the cookbook, the files have to have the following format and content:
# - File name pattern: "daily_adjusted_[TICKER].csv", where TICKER is the symbol of a stock.
# - The file contains at least columns "timestamp", "adjusted_close", and "volume".
# - The data is daily price/volume, covering at least the period from 2016-03-18 until 2021-03-18,
# - Files are for the stocks PM, LMT, MCD, MMM, AAPL, MSFT, TXN, CSCO.
list_stocks = ["PM", "LMT", "MCD", "MMM", "AAPL", "MSFT", "TXN", "CSCO"]
list_factors = []
alphaToken = None
list_tickers = list_stocks + list_factors
if alphaToken is not None:
data_download(list_tickers, alphaToken)
We load the daily stock price data from the downloaded CSV files. The data is adjusted for splits and dividends. Then a selected time period is taken from the data.
investment_start = "2016-03-18"
investment_end = "2021-03-18"
# The files are in "stock_data" folder, named as "daily_adjusted_[TICKER].csv"
dr = DataReader(folder_path="stock_data", symbol_list=list_tickers)
dr.read_data()
df_prices, _ = dr.get_period(start_date=investment_start, end_date=investment_end)
Found data files: stock_data/daily_adjusted_AAPL.csv stock_data/daily_adjusted_PM.csv stock_data/daily_adjusted_CSCO.csv stock_data/daily_adjusted_TXN.csv stock_data/daily_adjusted_MMM.csv stock_data/daily_adjusted_IWM.csv stock_data/daily_adjusted_MCD.csv stock_data/daily_adjusted_SPY.csv stock_data/daily_adjusted_MSFT.csv stock_data/daily_adjusted_LMT.csv Using data files: stock_data/daily_adjusted_PM.csv stock_data/daily_adjusted_LMT.csv stock_data/daily_adjusted_MCD.csv stock_data/daily_adjusted_MMM.csv stock_data/daily_adjusted_AAPL.csv stock_data/daily_adjusted_MSFT.csv stock_data/daily_adjusted_TXN.csv stock_data/daily_adjusted_CSCO.csv
Below we implement the optimization model in Fusion API. We create it inside a function so we can call it later.
In this model, the gross exposure is kept on a constant level. This can be modeled by separating the portfolio variable to positive and negative part, and constraining their sum. The tradeoff is that modeling positive and negative parts requires us to assign each part a binary vector. Thus the problem becomes mixed integer.
# x = xp - xm
# NOTE: Uses integer variables!
def posneg(M, x, bigm_p, bigm_m=None):
bigm_m = bigm_p if bigm_m is None else bigm_m
# Positive and negative part of x
xp = M.variable("_xp", N, Domain.greaterThan(0.0))
xm = M.variable("_xm", N, Domain.greaterThan(0.0))
# Binary variables
yp = M.variable("_yp", N, Domain.binary())
ym = M.variable("_ym", N, Domain.binary())
# Constraint assigning xp and xm to be the positive and negative part of x.
M.constraint('_pos-neg-part', Expr.sub(x, Expr.sub(xp, xm)), Domain.equalsTo(0.0))
# Constraints making sure xp and xm are never both positive.
M.constraint('_ubound-p', Expr.sub(xp, Expr.mul(bigm_p, yp)), Domain.lessThan(0.0))
M.constraint('_ubound-m', Expr.sub(xm, Expr.mul(bigm_m, ym)), Domain.lessThan(0.0))
M.constraint('_exclusion', Expr.add(yp, ym), Domain.lessThan(1.0))
return xp, xm, yp, ym
# L <= ||x||_1 <= U
# NOTE: Uses integer variables!
def norm1_mio(M, x, bigm, domain):
xp, xm, _, _ = posneg(M, x, bigm)
# Gross exposure constraint (forces 2 times the initial capital)
M.constraint('gross_exp', Expr.sum(Expr.add(xp, xm)), domain)
def EfficientFrontier(N, m, G, deltas):
with Model("Case study") as M:
# Settings
#M.setLogHandler(sys.stdout)
# Variables
# The variable x is the fraction of holdings relative to the initial capital.
# It is a free variable, allowing long and short positions.
x = M.variable("x", N, Domain.unbounded())
# The variable s models the portfolio variance term in the objective.
s = M.variable("s", 1, Domain.unbounded())
# Gross exposure constraint
norm1_mio(M, x, 2.0, Domain.equalsTo(2.0))
# Dollar neutrality constraint
M.constraint('neutrality', Expr.sum(x), Domain.equalsTo(0.0))
# Objective (quadratic utility version)
delta = M.parameter()
M.objective('obj', ObjectiveSense.Maximize, Expr.sub(Expr.dot(m, x), Expr.mul(delta, s)))
# Conic constraint for the portfolio variance
M.constraint('risk', Expr.vstack(s, 0.5, Expr.mul(G.transpose(), x)), Domain.inRotatedQCone())
# Create DataFrame to store the results. Last security name (the SPY) is removed.
columns = ["delta", "obj", "return", "risk", "g. exp."] + df_prices.columns.tolist()
df_result = pd.DataFrame(columns=columns)
for d in deltas:
# Update parameter
delta.setValue(d)
# Solve optimization
M.solve()
# Check if the solution is an optimal point
solsta = M.getPrimalSolutionStatus()
if (solsta != SolutionStatus.Optimal):
# See https://docs.mosek.com/latest/pythonfusion/accessing-solution.html about handling solution statuses.
raise Exception("Unexpected solution status!")
# Save results
portfolio_return = m @ x.level()
portfolio_risk = np.sqrt(s.level()[0])
gross_exp = sum(np.absolute(x.level()))
row = pd.Series([d, M.primalObjValue(), portfolio_return, portfolio_risk, gross_exp] + list(x.level()), index=columns)
df_result = pd.concat([df_result, pd.DataFrame([row])], ignore_index=True)
return df_result
Here we use the loaded daily price data to compute the corresponding yearly mean return and covariance matrix.
# Number of securities
N = df_prices.shape[1]
# Get optimization parameters
m, S = compute_inputs(df_prices)
Next we compute the matrix $G$ such that $\ECov=GG^\mathsf{T}$, this is the input of the conic form of the optimization problem. Here we use Cholesky factorization.
G = np.linalg.cholesky(S)
We run the optimization for a range of risk aversion parameter values: $\delta = 10^{-1},\dots,10^{1.5}$. We compute the efficient frontier this way both with and without using shrinkage estimation.
# Compute efficient frontier with and without shrinkage
deltas = np.logspace(start=-1, stop=1.5, num=20)[::-1]
df_result = EfficientFrontier(N, m, G, deltas)
Check the results.
df_result
delta | obj | return | risk | g. exp. | PM | LMT | MCD | MMM | AAPL | MSFT | TXN | CSCO | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 31.622777 | -0.464990 | 0.062518 | 0.129156 | 2.0 | 0.173130 | 2.474678e-01 | -3.372581e-01 | -2.568931e-01 | -0.163862 | 3.121972e-01 | 2.672053e-01 | -2.419872e-01 |
1 | 23.357215 | -0.326053 | 0.070600 | 0.130315 | 2.0 | 0.159688 | 2.432988e-01 | -3.337110e-01 | -2.678514e-01 | -0.155205 | 3.249477e-01 | 2.720653e-01 | -2.432330e-01 |
2 | 17.252105 | -0.220946 | 0.081541 | 0.132414 | 2.0 | 0.141491 | 2.376549e-01 | -3.289090e-01 | -2.826883e-01 | -0.143483 | 3.422096e-01 | 2.786448e-01 | -2.449195e-01 |
3 | 12.742750 | -0.139946 | 0.096354 | 0.136176 | 2.0 | 0.116853 | 2.300138e-01 | -3.224075e-01 | -3.027758e-01 | -0.127614 | 3.655803e-01 | 2.875527e-01 | -2.472028e-01 |
4 | 9.412050 | -0.067613 | 0.169392 | 0.158685 | 2.0 | -0.185170 | 3.159332e-01 | -2.750160e-01 | -2.899102e-01 | 0.090165 | 3.390421e-01 | 2.548602e-01 | -2.499033e-01 |
5 | 6.951928 | -0.003511 | 0.185865 | 0.165048 | 2.0 | -0.208575 | 2.678201e-01 | -2.413193e-01 | -3.146996e-01 | 0.106035 | 3.684833e-01 | 2.576615e-01 | -2.354058e-01 |
6 | 5.134833 | 0.048903 | 0.208168 | 0.176116 | 2.0 | -0.240260 | 2.026776e-01 | -1.956946e-01 | -3.482634e-01 | 0.127514 | 4.083526e-01 | 2.614562e-01 | -2.157824e-01 |
7 | 3.792690 | 0.094477 | 0.238362 | 0.194775 | 2.0 | -0.283159 | 1.144903e-01 | -1.339346e-01 | -3.936950e-01 | 0.156605 | 4.623162e-01 | 2.665886e-01 | -1.892116e-01 |
8 | 2.801357 | 0.137483 | 0.277953 | 0.223928 | 2.0 | -0.338266 | -2.157196e-02 | -4.161092e-02 | -4.504126e-01 | 0.195031 | 5.335729e-01 | 2.713965e-01 | -1.481390e-01 |
9 | 2.069138 | 0.177107 | 0.298427 | 0.242143 | 2.0 | -0.415078 | 1.225415e-08 | 1.700785e-08 | -5.158707e-01 | 0.246381 | 5.503481e-01 | 2.032710e-01 | -6.905105e-02 |
10 | 1.528307 | 0.211422 | 0.315448 | 0.260895 | 2.0 | -0.476221 | -2.155736e-09 | -1.776444e-09 | -5.237788e-01 | 0.309753 | 5.874734e-01 | 1.027733e-01 | 8.620232e-10 |
11 | 1.128838 | 0.240334 | 0.326192 | 0.275787 | 2.0 | -0.523016 | -1.386673e-08 | -1.262417e-08 | -4.769844e-01 | 0.381652 | 6.183479e-01 | 9.418158e-08 | -3.164627e-09 |
12 | 0.833782 | 0.263130 | 0.328904 | 0.280867 | 2.0 | -0.559580 | -3.949152e-08 | -3.949057e-08 | -4.404201e-01 | 0.438813 | 5.611868e-01 | 9.387767e-08 | -1.070953e-08 |
13 | 0.615848 | 0.280802 | 0.332579 | 0.289955 | 2.0 | -0.609112 | -1.073465e-08 | -9.791930e-09 | -3.908876e-01 | 0.516269 | 4.837309e-01 | 1.521013e-08 | -3.706820e-09 |
14 | 0.454878 | 0.294985 | 0.337550 | 0.305899 | 2.0 | -0.676097 | -2.586948e-10 | -2.543143e-10 | -3.239028e-01 | 0.621050 | 3.789498e-01 | -5.996953e-11 | -1.062450e-10 |
15 | 0.335982 | 0.306991 | 0.344283 | 0.333161 | 2.0 | -0.766839 | -2.405226e-09 | -2.331970e-09 | -2.331608e-01 | 0.762972 | 2.370280e-01 | -8.019062e-10 | -1.050058e-09 |
16 | 0.248163 | 0.317929 | 0.353399 | 0.378062 | 2.0 | -0.889698 | -6.343712e-09 | -6.401940e-09 | -1.103019e-01 | 0.955109 | 4.489092e-02 | -1.726304e-09 | -2.656342e-09 |
17 | 0.183298 | 0.327910 | 0.356984 | 0.398270 | 2.0 | -1.000000 | -4.364673e-09 | -4.519869e-09 | -4.854710e-09 | 1.000000 | 4.363003e-08 | -1.498877e-09 | -1.937569e-09 |
18 | 0.135388 | 0.335509 | 0.356984 | 0.398270 | 2.0 | -1.000000 | -4.364673e-09 | -4.519869e-09 | -4.854710e-09 | 1.000000 | 4.363003e-08 | -1.498877e-09 | -1.937569e-09 |
19 | 0.100000 | 0.341122 | 0.356984 | 0.398270 | 2.0 | -1.000000 | -4.364673e-09 | -4.519869e-09 | -4.854710e-09 | 1.000000 | 4.363003e-08 | -1.498877e-09 | -1.937569e-09 |
Plot the efficient frontier.
ax = df_result.plot(x="risk", y="return", style="-o",
xlabel="portfolio risk (std. dev.)", ylabel="portfolio return", grid=True)
ax.legend(["return"]);
Plot the portfolio composition.
my_cmap = LinearSegmentedColormap.from_list("non-extreme gray", ["#111111", "#eeeeee"], N=256, gamma=1.0)
ax = plt.gca()
ax.set_xticks(df_result['risk'])
df_result.set_index('risk').iloc[:, 4:].plot.bar(ax=ax, colormap=my_cmap, xlabel='portfolio risk (std. dev.)', ylabel="x", stacked=True, width=1.0)
ax.set_ylim([-1, 1])
ax.grid(which='both', axis='x', linestyle=':', color='k', linewidth=1)