LaTeX macros (hidden cell) $ \newcommand{\Q}{\mathcal{Q}} \newcommand{\ECov}{\boldsymbol{\Sigma}} \newcommand{\EMean}{\boldsymbol{\mu}} \newcommand{\EAlpha}{\boldsymbol{\alpha}} \newcommand{\EBeta}{\boldsymbol{\beta}} $

Imports and configuration¶

In [10]:

import sys
import os
import re
import datetime as dt

import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

from mosek.fusion import *

from notebook.services.config import ConfigManager

from portfolio_tools import data_download, DataReader, compute_inputs

In [11]:

# Version checks
print(sys.version)
print('matplotlib: {}'.format(matplotlib.__version__))

# Jupyter configuration
c = ConfigManager()
c.update('notebook', {"CodeCell": {"cm_config": {"autoCloseBrackets": False}}})  

# Numpy options
np.set_printoptions(precision=5, linewidth=120, suppress=True)

# Pandas options
pd.set_option('display.max_rows', None)

# Matplotlib options
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 200

3.9.7 (default, Sep 16 2021, 13:09:58) 
[GCC 7.5.0]
matplotlib: 3.7.2

Prepare input data¶

Here we load the raw data that will be used to compute the optimization input variables, the vector $\EMean$ of expected returns and the covariance matrix $\ECov$. The data consists of daily stock prices of $8$ stocks from the US market.

Download data¶

In [12]:

# Data downloading:
# If the user has an API key for alphavantage.co, then this code part will download the data. 
# The code can be modified to download from other sources. To be able to run the examples, 
# and reproduce results in the cookbook, the files have to have the following format and content:
# - File name pattern: "daily_adjusted_[TICKER].csv", where TICKER is the symbol of a stock. 
# - The file contains at least columns "timestamp", "adjusted_close", and "volume".
# - The data is daily price/volume, covering at least the period from 2016-03-18 until 2021-03-18, 
# - Files are for the stocks PM, LMT, MCD, MMM, AAPL, MSFT, TXN, CSCO.
list_stocks = ["PM", "LMT", "MCD", "MMM", "AAPL", "MSFT", "TXN", "CSCO"]
list_factors = []
alphaToken = None
 
list_tickers = list_stocks + list_factors
if alphaToken is not None:
    data_download(list_tickers, alphaToken)  

Read data¶

We load the daily stock price data from the downloaded CSV files. The data is adjusted for splits and dividends. Then a selected time period is taken from the data.

In [13]:

investment_start = "2016-03-18"
investment_end = "2021-03-18"

In [14]:

# The files are in "stock_data" folder, named as "daily_adjusted_[TICKER].csv"
dr = DataReader(folder_path="stock_data", symbol_list=list_tickers)
dr.read_data()
df_prices, _ = dr.get_period(start_date=investment_start, end_date=investment_end)

Found data files: 
stock_data/daily_adjusted_AAPL.csv
stock_data/daily_adjusted_PM.csv
stock_data/daily_adjusted_CSCO.csv
stock_data/daily_adjusted_TXN.csv
stock_data/daily_adjusted_MMM.csv
stock_data/daily_adjusted_IWM.csv
stock_data/daily_adjusted_MCD.csv
stock_data/daily_adjusted_SPY.csv
stock_data/daily_adjusted_MSFT.csv
stock_data/daily_adjusted_LMT.csv

Using data files: 
stock_data/daily_adjusted_PM.csv
stock_data/daily_adjusted_LMT.csv
stock_data/daily_adjusted_MCD.csv
stock_data/daily_adjusted_MMM.csv
stock_data/daily_adjusted_AAPL.csv
stock_data/daily_adjusted_MSFT.csv
stock_data/daily_adjusted_TXN.csv
stock_data/daily_adjusted_CSCO.csv

Run the optimization¶

Define the optimization model¶

First we generate return scenario data using Monte Carlo method based on the moments of historical prices.

In [15]:

# Number of scenarios
T = 99999

# Mean and covariance of historical log-returns.  
m_log, S_log = compute_inputs(df_prices, return_log=True)

# Generate logarithmic return scenarios assuming normal distribution
scenarios_log = np.random.default_rng().multivariate_normal(m_log, S_log, T)
    
# Convert logarithmic return scenarios to linear return scenarios 
scenarios_lin = np.exp(scenarios_log) - 1

# Scenario probabilities
p = np.ones(T) / T

We would like to optimize the 95% EVaR of the portfolio loss distribution.

In [16]:

# Confidence level
alpha = 0.95

In [17]:

# Primal CVaR formula (for testing)
def CVaR(alpha, p, q):
    # We need to be careful that math index starts from 1 but numpy starts from 0 (matters in formulas like ceil(alpha * T))
    T = q.shape[0]
    
    # Starting index 
    i_alpha = np.sort(np.nonzero(np.cumsum(p) >= alpha)[0])[0]

    # Weight of VaR component in CVaR
    lambda_alpha = (sum(p[:(i_alpha + 1)]) - alpha) / (1 - alpha)    
    
    # CVaR
    sort_idx = np.argsort(q)
    sorted_q = q[sort_idx]
    sorted_p = p[sort_idx]
    cvar = lambda_alpha * sorted_q[i_alpha] + np.dot(sorted_p[(i_alpha + 1):], sorted_q[(i_alpha + 1):]) / (1 - alpha)
    
    return cvar

Below we implement the optimization model in Fusion API. We create it inside a function so we can call it later.

In [18]:

# s * log(sum_i(p_i * exp(x_i / s))) <= t
def persplogsumexp(M, x, s, p, t):
    n = int(x.getSize())
    u = M.variable(n)
    M.constraint(Expr.hstack(u, Var.repeat(s, n), Expr.sub(x, Var.repeat(t, n))), Domain.inPExpCone())
    M.constraint(Expr.sub(Expr.dot(p, u), s), Domain.lessThan(0.0))
    

def EfficientFrontier(N, T, m, R, p, alpha, deltas):

    with Model("Case study") as M:
        # Settings
        #M.setLogHandler(sys.stdout)
        
        # Variables 
        # The variable x is the fraction of holdings relative to the initial capital.
        # It is constrained to take only positive values. 
        x = M.variable("x", N, Domain.greaterThan(0.0))
        
        # Budget constraint
        M.constraint('budget', Expr.sum(x), Domain.equalsTo(1.0))
        
        # Auxiliary variables.
        z = M.variable("z", 1, Domain.unbounded())
        t = M.variable("t", 1, Domain.greaterThan(0.0))
        
        # Constraint modeling perspective of log-sum-exp
        persplogsumexp(M, Expr.mul(-R, x), t, p, z)
        
        # Objective
        delta = M.parameter()
        evar_term = Expr.sub(z, Expr.mul(t, np.log(1.0 - alpha)))
        M.objective('obj', ObjectiveSense.Maximize, Expr.sub(Expr.dot(m, x), Expr.mul(delta, evar_term)))
        
        # Create DataFrame to store the results.
        columns = ["delta", "obj", "return", "risk", "evar"] + df_prices.columns.tolist()
        df_result = pd.DataFrame(columns=columns)
        for d in deltas:
            # Update parameter
            delta.setValue(d);
        
            # Solve optimization
            M.solve()
            # Check if the solution is an optimal point
            solsta = M.getPrimalSolutionStatus()
            if (solsta != SolutionStatus.Optimal):
                # See https://docs.mosek.com/latest/pythonfusion/accessing-solution.html about handling solution statuses.
                raise Exception("Unexpected solution status!")

            # Save results
            portfolio_return = m @ x.level()
            portfolio_risk = z.level()[0] - t.level()[0] * np.log(1.0 - alpha)
            cvar = CVaR(alpha, p, -R @ x.level())
            row = pd.Series([d, M.primalObjValue(), portfolio_return, portfolio_risk, cvar] + list(x.level()), index=columns)
            df_result = pd.concat([df_result, pd.DataFrame([row])], ignore_index=True)
            
            # Check CVaR value using primal formula
            print(f"Relative difference between EVaR and CVaR (%): {(cvar - portfolio_risk) / portfolio_risk * 100}")

        return df_result

Compute optimization input variables¶

Here we use the loaded daily price data to compute the corresponding yearly mean return and covariance matrix.

In [19]:

# Number of securities
N = df_prices.shape[1]  

# Get optimization parameters
m, _ = compute_inputs(df_prices)

Call the optimizer function¶

We run the optimization for a range of risk aversion parameter values: $\delta = 10^{-1},\dots,10^{2}$. We compute the efficient frontier this way both with and without using shrinkage estimation.

In [20]:

# Compute efficient frontier with and without shrinkage
deltas = np.logspace(start=-1, stop=2, num=20)[::-1]
df_result = EfficientFrontier(N, T, m, scenarios_lin, p, alpha, deltas)

Relative difference between EVaR and CVaR (%): -33.42378204398512
Relative difference between EVaR and CVaR (%): -33.45646972266584
Relative difference between EVaR and CVaR (%): -33.50204787771225
Relative difference between EVaR and CVaR (%): -33.56488606024007
Relative difference between EVaR and CVaR (%): -33.65252039086718
Relative difference between EVaR and CVaR (%): -33.77210072922806
Relative difference between EVaR and CVaR (%): -33.90833145658367
Relative difference between EVaR and CVaR (%): -34.06647761773668
Relative difference between EVaR and CVaR (%): -34.228864613910865
Relative difference between EVaR and CVaR (%): -34.32076597469258
Relative difference between EVaR and CVaR (%): -34.26903206118584
Relative difference between EVaR and CVaR (%): -34.0913142099972
Relative difference between EVaR and CVaR (%): -33.706820776915826
Relative difference between EVaR and CVaR (%): -32.87550621993
Relative difference between EVaR and CVaR (%): -32.13821514072266
Relative difference between EVaR and CVaR (%): -31.414840069754284
Relative difference between EVaR and CVaR (%): -29.94969115976413
Relative difference between EVaR and CVaR (%): -27.216047490936095
Relative difference between EVaR and CVaR (%): -23.044884017560292
Relative difference between EVaR and CVaR (%): -20.845617388377192

Check the results.

In [21]:

df_result

Out[21]:

	delta	obj	return	risk	evar	PM	LMT	MCD	MMM	AAPL	MSFT	TXN	CSCO
0	100.000000	-14.809388	0.347111	0.151565	0.100906	-2.712381e-10	1.494921e-02	1.363686e-01	2.281447e-10	0.090220	5.360870e-01	2.223747e-01	3.692416e-08
1	69.519280	-10.189479	0.347760	0.151573	0.100862	-1.494799e-10	1.380761e-02	1.347412e-01	8.227581e-10	0.090979	5.380318e-01	2.224405e-01	2.026158e-08
2	48.329302	-6.977512	0.348694	0.151589	0.100804	-2.945664e-10	1.216126e-02	1.323962e-01	-2.890110e-10	0.092075	5.408365e-01	2.225308e-01	5.209527e-10
3	33.598183	-4.744225	0.350042	0.151623	0.100731	-1.268907e-10	9.786780e-03	1.290179e-01	-1.236874e-10	0.093662	5.448811e-01	2.226519e-01	-5.141085e-11
4	23.357215	-3.191162	0.351989	0.151694	0.100645	9.499372e-11	6.357014e-03	1.241467e-01	1.313828e-09	0.095965	5.507230e-01	2.228084e-01	7.211346e-08
5	16.237767	-2.110756	0.354809	0.151841	0.100561	-3.079113e-10	1.390277e-03	1.171098e-01	-2.952811e-10	0.099325	5.591803e-01	2.229945e-01	-1.388656e-10
6	11.288379	-1.358693	0.358261	0.152099	0.100525	-4.289182e-10	1.324001e-07	1.038537e-01	-3.300871e-10	0.104016	5.700917e-01	2.220389e-01	4.122753e-09
7	7.847600	-0.834635	0.363036	0.152616	0.100625	-7.075282e-10	1.695273e-08	8.366961e-02	-2.133383e-10	0.110821	5.854145e-01	2.200944e-01	4.496131e-09
8	5.455595	-0.468526	0.369963	0.153693	0.101086	-6.042409e-10	5.839406e-08	5.453391e-02	1.396929e-10	0.120911	6.076218e-01	2.169333e-01	1.057438e-08
9	3.792690	-0.211414	0.380061	0.155952	0.102428	4.562780e-09	1.048043e-07	1.233776e-02	9.746963e-09	0.135994	6.399893e-01	2.116789e-01	7.122438e-08
10	2.636651	-0.030093	0.384901	0.157394	0.103457	-2.675437e-10	8.275791e-09	1.358343e-07	7.311167e-10	0.153269	6.565735e-01	1.901573e-01	7.955828e-09
11	1.832981	0.096914	0.388271	0.158953	0.104764	1.299553e-09	4.993040e-08	4.729194e-07	2.645259e-09	0.176696	6.686228e-01	1.546808e-01	3.657430e-08
12	1.274275	0.186450	0.393112	0.162180	0.107514	-2.211452e-10	6.756127e-10	4.488115e-08	9.512924e-10	0.211567	6.840754e-01	1.043575e-01	4.037568e-09
13	0.885867	0.250502	0.400035	0.168798	0.113305	2.755646e-09	-1.266690e-11	2.424045e-08	3.777747e-10	0.264047	7.022571e-01	3.369572e-02	2.769004e-09
14	0.615848	0.297040	0.404439	0.174391	0.118345	-2.052848e-11	-1.011557e-11	1.175816e-11	-1.748460e-11	0.319658	6.803416e-01	8.585560e-10	-6.242794e-12
15	0.428133	0.330133	0.406807	0.179091	0.122830	5.774546e-10	3.875773e-10	2.384531e-08	1.150304e-09	0.385380	6.146204e-01	2.126529e-08	2.074802e-09
16	0.297635	0.354048	0.410417	0.189392	0.132669	6.395015e-10	6.525280e-10	1.226104e-09	2.599322e-09	0.485545	5.144548e-01	3.869882e-08	3.423996e-09
17	0.206914	0.372056	0.415926	0.212022	0.154318	6.981106e-10	1.975800e-09	5.781582e-09	3.504131e-10	0.638413	3.615865e-01	1.405329e-08	2.153144e-09
18	0.143845	0.386679	0.424172	0.260645	0.200580	2.219064e-09	6.181270e-09	1.355847e-08	4.917751e-09	0.867205	1.327948e-01	1.368773e-08	2.201932e-09
19	0.100000	0.399278	0.428958	0.296801	0.234931	-1.725104e-12	1.257411e-11	2.114564e-11	-4.253230e-13	1.000000	1.343903e-08	1.405113e-10	4.463059e-12

Visualize the results¶

Plot the efficient frontier.

In [22]:

ax = df_result.plot(x="risk", y="return", style="-o", 
                    xlabel="portfolio risk (EVaR)", ylabel="portfolio return", grid=True)   
ax.legend(["return"]);

Plot the portfolio composition.

In [23]:

# Round small values to 0 to make plotting work
mask = np.absolute(df_result) < 1e-7
mask.iloc[:, :-8] = False
df_result[mask] = 0

In [24]:

my_cmap = LinearSegmentedColormap.from_list("non-extreme gray", ["#111111", "#eeeeee"], N=256, gamma=1.0)
ax = df_result.set_index('risk').iloc[:, 4:].plot.area(colormap=my_cmap, xlabel='portfolio risk (EVaR)', ylabel="x") 
ax.grid(which='both', axis='x', linestyle=':', color='k', linewidth=1)

In [ ]: