LaTeX macros (hidden cell) $ \newcommand{\Q}{\mathcal{Q}} \newcommand{\ECov}{\boldsymbol{\Sigma}} \newcommand{\EMean}{\boldsymbol{\mu}} \newcommand{\EAlpha}{\boldsymbol{\alpha}} \newcommand{\EBeta}{\boldsymbol{\beta}} $
import sys
import os
import re
import datetime as dt
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from mosek.fusion import *
from notebook.services.config import ConfigManager
from portfolio_tools import data_download, DataReader, compute_inputs
# Version checks
print(sys.version)
print('matplotlib: {}'.format(matplotlib.__version__))
# Jupyter configuration
c = ConfigManager()
c.update('notebook', {"CodeCell": {"cm_config": {"autoCloseBrackets": False}}})
# Numpy options
np.set_printoptions(precision=5, linewidth=120, suppress=True)
# Pandas options
pd.set_option('display.max_rows', None)
# Matplotlib options
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 200
3.6.9 (default, Jan 26 2021, 15:33:00) [GCC 8.4.0] matplotlib: 3.3.4
Here we load the raw data that will be used to compute the optimization input variables, the vector $\EMean$ of expected returns and the covariance matrix $\ECov$. The data consists of daily stock prices of $8$ stocks from the US market.
# Data downloading:
# If the user has an API key for alphavantage.co, then this code part will download the data.
# The code can be modified to download from other sources. To be able to run the examples,
# and reproduce results in the cookbook, the files have to have the following format and content:
# - File name pattern: "daily_adjusted_[TICKER].csv", where TICKER is the symbol of a stock.
# - The file contains at least columns "timestamp", "adjusted_close", and "volume".
# - The data is daily price/volume, covering at least the period from 2016-03-18 until 2021-03-18,
# - Files are for the stocks PM, LMT, MCD, MMM, AAPL, MSFT, TXN, CSCO.
list_stocks = ["PM", "LMT", "MCD", "MMM", "AAPL", "MSFT", "TXN", "CSCO"]
list_factors = []
alphaToken = None
list_tickers = list_stocks + list_factors
if alphaToken is not None:
data_download(list_tickers, alphaToken)
We load the daily stock price data from the downloaded CSV files. The data is adjusted for splits and dividends. Then a selected time period is taken from the data.
investment_start = "2016-03-18"
investment_end = "2021-03-18"
# The files are in "stock_data" folder, named as "daily_adjusted_[TICKER].csv"
dr = DataReader(folder_path="stock_data", symbol_list=list_tickers)
dr.read_data()
df_prices, _ = dr.get_period(start_date=investment_start, end_date=investment_end)
Found data files: stock_data/daily_adjusted_AAPL.csv stock_data/daily_adjusted_PM.csv stock_data/daily_adjusted_CSCO.csv stock_data/daily_adjusted_TXN.csv stock_data/daily_adjusted_MMM.csv stock_data/daily_adjusted_IWM.csv stock_data/daily_adjusted_MCD.csv stock_data/daily_adjusted_SPY.csv stock_data/daily_adjusted_MSFT.csv stock_data/daily_adjusted_LMT.csv Using data files: stock_data/daily_adjusted_PM.csv stock_data/daily_adjusted_LMT.csv stock_data/daily_adjusted_MCD.csv stock_data/daily_adjusted_MMM.csv stock_data/daily_adjusted_AAPL.csv stock_data/daily_adjusted_MSFT.csv stock_data/daily_adjusted_TXN.csv stock_data/daily_adjusted_CSCO.csv
Below we implement the optimization model in Fusion API. We create it inside a function so we can call it later.
def EfficientFrontier(N, m, G, deltas):
with Model("Case study") as M:
# Settings
#M.setLogHandler(sys.stdout)
# Variables
# The variable x is the fraction of holdings in each security.
# It is restricted to be positive, which imposes the constraint of no short-selling.
x = M.variable("x", N, Domain.greaterThan(0.0))
# The variable s models the portfolio variance term in the objective.
s = M.variable("s", 1, Domain.unbounded())
# Budget constraint
M.constraint('budget', Expr.sum(x), Domain.equalsTo(1.0))
# Objective (quadratic utility version)
delta = M.parameter()
M.objective('obj', ObjectiveSense.Maximize, Expr.sub(Expr.dot(m, x), Expr.mul(delta, s)))
# Conic constraint for the portfolio variance
M.constraint('risk', Expr.vstack(s, Expr.mul(G.transpose(), x)), Domain.inQCone())
# Create DataFrame to store the results. Last security name (the SPY) is removed.
columns = ["delta", "obj", "return", "risk"] + df_prices.columns.tolist()
df_result = pd.DataFrame(columns=columns)
for d in deltas:
# Update parameter
delta.setValue(d);
# Solve optimization
M.solve()
# Check if the solution is an optimal point
solsta = M.getPrimalSolutionStatus()
if (solsta != SolutionStatus.Optimal):
# See https://docs.mosek.com/latest/pythonfusion/accessing-solution.html about handling solution statuses.
raise Exception("Unexpected solution status!")
# Save results
portfolio_return = m @ x.level()
portfolio_risk = s.level()[0]
row = pd.Series([d, M.primalObjValue(), portfolio_return, portfolio_risk] + list(x.level()), index=columns)
df_result = pd.concat([df_result, pd.DataFrame([row])], ignore_index=True)
return df_result
Here we use the loaded daily price data to compute the corresponding yearly mean return and covariance matrix.
# Number of securities
N = df_prices.shape[1]
# Get optimization parameters
m_orig, S_orig = compute_inputs(df_prices, shrinkage=False)
m_shrunk, S_shrunk = compute_inputs(df_prices, shrinkage=True)
Next we compute the matrix $G$ such that $\ECov=GG^\mathsf{T}$, this is the input of the conic form of the optimization problem. Here we use Cholesky factorization.
G_orig = np.linalg.cholesky(S_orig)
G_shrunk = np.linalg.cholesky(S_shrunk)
We run the optimization for a range of risk aversion parameter values: $\delta = 10^{-1},\dots,10^{1.5}$. We compute the efficient frontier this way both with and without using shrinkage estimation.
# Compute efficient frontier with and without shrinkage
deltas = np.logspace(start=-1, stop=1.5, num=20)[::-1]
df_result_orig = EfficientFrontier(N, m_orig, G_orig, deltas)
df_result_shrunk = EfficientFrontier(N, m_shrunk, G_shrunk, deltas)
# Set small negatives to zero to make plotting work
mask = df_result_orig < 0
mask.iloc[:, :-8] = False
df_result_orig[mask] = 0
# Set small negatives to zero to make plotting work
mask = df_result_shrunk < 0
mask.iloc[:, :-8] = False
df_result_shrunk[mask] = 0
Check the results without shrinkage.
df_result_orig
delta | obj | return | risk | PM | LMT | MCD | MMM | AAPL | MSFT | TXN | CSCO | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 31.622777 | -6.271623 | 0.175479 | 0.203875 | 1.028265e-01 | 1.130694e-01 | 3.006178e-01 | 1.649833e-01 | 1.859269e-09 | 7.935826e-02 | 5.704450e-02 | 1.821003e-01 |
1 | 23.357215 | -4.586052 | 0.178755 | 0.203997 | 9.909669e-02 | 1.126088e-01 | 3.000666e-01 | 1.591047e-01 | 4.990948e-09 | 8.752508e-02 | 6.125147e-02 | 1.803467e-01 |
2 | 17.252105 | -3.340046 | 0.183200 | 0.204221 | 9.403792e-02 | 1.119876e-01 | 2.993067e-01 | 1.511269e-01 | 1.573623e-08 | 9.860880e-02 | 6.696294e-02 | 1.779691e-01 |
3 | 12.742750 | -2.418350 | 0.189246 | 0.204634 | 8.715666e-02 | 1.111420e-01 | 2.982765e-01 | 1.402759e-01 | 1.023495e-08 | 1.136858e-01 | 7.473031e-02 | 1.747329e-01 |
4 | 9.412050 | -1.735700 | 0.197502 | 0.205396 | 7.775956e-02 | 1.099847e-01 | 2.968750e-01 | 1.254593e-01 | 7.721570e-09 | 1.342709e-01 | 8.533623e-02 | 1.703143e-01 |
5 | 6.951928 | -1.228921 | 0.208856 | 0.206817 | 6.483614e-02 | 1.083978e-01 | 2.949460e-01 | 1.050807e-01 | 2.072306e-08 | 1.625818e-01 | 9.991932e-02 | 1.642383e-01 |
6 | 5.134833 | -0.851059 | 0.224685 | 0.209499 | 4.681659e-02 | 1.061779e-01 | 2.922631e-01 | 7.667409e-02 | 1.105620e-08 | 2.020499e-01 | 1.202504e-01 | 1.557680e-01 |
7 | 3.792690 | -0.566941 | 0.247845 | 0.214831 | 1.987726e-02 | 1.033740e-01 | 2.885322e-01 | 3.622298e-02 | 4.707309e-03 | 2.559452e-01 | 1.478726e-01 | 1.434684e-01 |
8 | 2.801357 | -0.349680 | 0.277674 | 0.223947 | 3.664982e-09 | 9.021648e-02 | 2.674048e-01 | 9.155908e-09 | 2.610527e-02 | 3.244756e-01 | 1.773813e-01 | 1.144165e-01 |
9 | 2.069138 | -0.181326 | 0.312261 | 0.238547 | 2.982481e-09 | 5.163382e-02 | 2.062153e-01 | 2.615126e-10 | 5.362076e-02 | 4.289136e-01 | 2.091390e-01 | 5.047755e-02 |
10 | 1.528307 | -0.045407 | 0.361694 | 0.266374 | 4.009003e-09 | 2.089985e-07 | 8.067627e-02 | 3.986740e-09 | 9.755740e-02 | 5.804044e-01 | 2.413616e-01 | 5.340562e-08 |
11 | 1.128838 | 0.065444 | 0.384750 | 0.282863 | 5.519963e-10 | 3.570274e-09 | 4.261622e-09 | 0.000000e+00 | 1.486028e-01 | 6.614851e-01 | 1.899121e-01 | 1.702059e-09 |
12 | 0.833782 | 0.150242 | 0.395189 | 0.293778 | 1.080010e-09 | 1.517056e-08 | 1.099099e-08 | 0.000000e+00 | 2.138700e-01 | 7.097808e-01 | 7.634917e-02 | 5.430428e-10 |
13 | 0.615848 | 0.215749 | 0.403111 | 0.304233 | 7.329334e-10 | 1.595340e-08 | 1.294185e-09 | 0.000000e+00 | 2.828077e-01 | 7.171919e-01 | 3.424213e-07 | 0.000000e+00 |
14 | 0.454878 | 0.265016 | 0.405375 | 0.308566 | 0.000000e+00 | 1.240693e-09 | 0.000000e+00 | 0.000000e+00 | 3.456455e-01 | 6.543544e-01 | 3.395459e-08 | 0.000000e+00 |
15 | 0.335982 | 0.302123 | 0.408638 | 0.317024 | 0.000000e+00 | 7.753166e-09 | 0.000000e+00 | 0.000000e+00 | 4.361732e-01 | 5.638266e-01 | 1.814609e-07 | 0.000000e+00 |
16 | 0.248163 | 0.330596 | 0.413612 | 0.334520 | 0.000000e+00 | 2.820040e-09 | 0.000000e+00 | 0.000000e+00 | 5.741893e-01 | 4.258107e-01 | 3.321413e-08 | 0.000000e+00 |
17 | 0.183298 | 0.353357 | 0.422248 | 0.375837 | 8.334403e-13 | 9.239579e-11 | 0.000000e+00 | 0.000000e+00 | 8.138078e-01 | 1.861922e-01 | 8.563839e-10 | 0.000000e+00 |
18 | 0.135388 | 0.372738 | 0.428958 | 0.415251 | 0.000000e+00 | 1.845514e-09 | 0.000000e+00 | 0.000000e+00 | 9.999997e-01 | 2.564263e-07 | 1.665269e-08 | 0.000000e+00 |
19 | 0.100000 | 0.387433 | 0.428958 | 0.415251 | 0.000000e+00 | 6.340395e-09 | 0.000000e+00 | 0.000000e+00 | 9.999991e-01 | 8.996339e-07 | 5.941383e-08 | 0.000000e+00 |
Plot the efficient frontier for both cases.
ax = df_result_shrunk.plot(x="risk", y="return", style="-o", xlabel="portfolio risk (std. dev.)", ylabel="portfolio return", grid=True)
df_result_orig.plot(ax=ax, x="risk", y="return", style="-o", xlabel="portfolio risk (std. dev.)", ylabel="portfolio return", grid=True)
ax.legend(["return with shrinkage", "return"]);
Plot the portfolio composition for both cases.
my_cmap = LinearSegmentedColormap.from_list("non-extreme gray", ["#111111", "#eeeeee"], N=256, gamma=1.0)
ax1 = df_result_shrunk.set_index('risk').iloc[:, 3:].plot.area(colormap=my_cmap, xlabel='portfolio risk (std. dev.)', ylabel="x")
ax1.grid(which='both', axis='x', linestyle=':', color='k', linewidth=1)
ax2 = df_result_orig.set_index('risk').iloc[:, 3:].plot.area(colormap=my_cmap, xlabel='portfolio risk (std. dev.)', ylabel="x")
ax2.grid(which='both', axis='x', linestyle=':', color='k', linewidth=1)