#!/usr/bin/env python # coding: utf-8 # LaTeX macros (hidden cell) # $ # \newcommand{\Q}{\mathcal{Q}} # \newcommand{\ECov}{\boldsymbol{\Sigma}} # \newcommand{\EMean}{\boldsymbol{\mu}} # \newcommand{\EAlpha}{\boldsymbol{\alpha}} # \newcommand{\EBeta}{\boldsymbol{\beta}} # $ # # Imports and configuration # In[10]: import sys import os import re import datetime as dt import numpy as np import pandas as pd get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib import matplotlib.pyplot as plt from matplotlib.colors import LinearSegmentedColormap from mosek.fusion import * from notebook.services.config import ConfigManager from portfolio_tools import data_download, DataReader, compute_inputs # In[11]: # Version checks print(sys.version) print('matplotlib: {}'.format(matplotlib.__version__)) # Jupyter configuration c = ConfigManager() c.update('notebook', {"CodeCell": {"cm_config": {"autoCloseBrackets": False}}}) # Numpy options np.set_printoptions(precision=5, linewidth=120, suppress=True) # Pandas options pd.set_option('display.max_rows', None) # Matplotlib options plt.rcParams['figure.figsize'] = [12, 8] plt.rcParams['figure.dpi'] = 200 # # Prepare input data # Here we load the raw data that will be used to compute the optimization input variables, the vector $\EMean$ of expected returns and the covariance matrix $\ECov$. The data consists of daily stock prices of $8$ stocks from the US market. # ## Download data # In[12]: # Data downloading: # If the user has an API key for alphavantage.co, then this code part will download the data. # The code can be modified to download from other sources. To be able to run the examples, # and reproduce results in the cookbook, the files have to have the following format and content: # - File name pattern: "daily_adjusted_[TICKER].csv", where TICKER is the symbol of a stock. # - The file contains at least columns "timestamp", "adjusted_close", and "volume". # - The data is daily price/volume, covering at least the period from 2016-03-18 until 2021-03-18, # - Files are for the stocks PM, LMT, MCD, MMM, AAPL, MSFT, TXN, CSCO. list_stocks = ["PM", "LMT", "MCD", "MMM", "AAPL", "MSFT", "TXN", "CSCO"] list_factors = [] alphaToken = None list_tickers = list_stocks + list_factors if alphaToken is not None: data_download(list_tickers, alphaToken) # ## Read data # We load the daily stock price data from the downloaded CSV files. The data is adjusted for splits and dividends. Then a selected time period is taken from the data. # In[13]: investment_start = "2016-03-18" investment_end = "2021-03-18" # In[14]: # The files are in "stock_data" folder, named as "daily_adjusted_[TICKER].csv" dr = DataReader(folder_path="stock_data", symbol_list=list_tickers) dr.read_data() df_prices, _ = dr.get_period(start_date=investment_start, end_date=investment_end) # # Run the optimization # ## Define the optimization model # First we generate return scenario data using Monte Carlo method based on the moments of historical prices. # In[15]: # Number of scenarios T = 99999 # Mean and covariance of historical log-returns. m_log, S_log = compute_inputs(df_prices, return_log=True) # Generate logarithmic return scenarios assuming normal distribution scenarios_log = np.random.default_rng().multivariate_normal(m_log, S_log, T) # Convert logarithmic return scenarios to linear return scenarios scenarios_lin = np.exp(scenarios_log) - 1 # Scenario probabilities p = np.ones(T) / T # We would like to optimize the 95% EVaR of the portfolio loss distribution. # In[16]: # Confidence level alpha = 0.95 # In[17]: # Primal CVaR formula (for testing) def CVaR(alpha, p, q): # We need to be careful that math index starts from 1 but numpy starts from 0 (matters in formulas like ceil(alpha * T)) T = q.shape[0] # Starting index i_alpha = np.sort(np.nonzero(np.cumsum(p) >= alpha)[0])[0] # Weight of VaR component in CVaR lambda_alpha = (sum(p[:(i_alpha + 1)]) - alpha) / (1 - alpha) # CVaR sort_idx = np.argsort(q) sorted_q = q[sort_idx] sorted_p = p[sort_idx] cvar = lambda_alpha * sorted_q[i_alpha] + np.dot(sorted_p[(i_alpha + 1):], sorted_q[(i_alpha + 1):]) / (1 - alpha) return cvar # Below we implement the optimization model in Fusion API. We create it inside a function so we can call it later. # In[18]: # s * log(sum_i(p_i * exp(x_i / s))) <= t def persplogsumexp(M, x, s, p, t): n = int(x.getSize()) u = M.variable(n) M.constraint(Expr.hstack(u, Var.repeat(s, n), Expr.sub(x, Var.repeat(t, n))), Domain.inPExpCone()) M.constraint(Expr.sub(Expr.dot(p, u), s), Domain.lessThan(0.0)) def EfficientFrontier(N, T, m, R, p, alpha, deltas): with Model("Case study") as M: # Settings #M.setLogHandler(sys.stdout) # Variables # The variable x is the fraction of holdings relative to the initial capital. # It is constrained to take only positive values. x = M.variable("x", N, Domain.greaterThan(0.0)) # Budget constraint M.constraint('budget', Expr.sum(x), Domain.equalsTo(1.0)) # Auxiliary variables. z = M.variable("z", 1, Domain.unbounded()) t = M.variable("t", 1, Domain.greaterThan(0.0)) # Constraint modeling perspective of log-sum-exp persplogsumexp(M, Expr.mul(-R, x), t, p, z) # Objective delta = M.parameter() evar_term = Expr.sub(z, Expr.mul(t, np.log(1.0 - alpha))) M.objective('obj', ObjectiveSense.Maximize, Expr.sub(Expr.dot(m, x), Expr.mul(delta, evar_term))) # Create DataFrame to store the results. columns = ["delta", "obj", "return", "risk", "evar"] + df_prices.columns.tolist() df_result = pd.DataFrame(columns=columns) for d in deltas: # Update parameter delta.setValue(d); # Solve optimization M.solve() # Check if the solution is an optimal point solsta = M.getPrimalSolutionStatus() if (solsta != SolutionStatus.Optimal): # See https://docs.mosek.com/latest/pythonfusion/accessing-solution.html about handling solution statuses. raise Exception("Unexpected solution status!") # Save results portfolio_return = m @ x.level() portfolio_risk = z.level()[0] - t.level()[0] * np.log(1.0 - alpha) cvar = CVaR(alpha, p, -R @ x.level()) row = pd.Series([d, M.primalObjValue(), portfolio_return, portfolio_risk, cvar] + list(x.level()), index=columns) df_result = pd.concat([df_result, pd.DataFrame([row])], ignore_index=True) # Check CVaR value using primal formula print(f"Relative difference between EVaR and CVaR (%): {(cvar - portfolio_risk) / portfolio_risk * 100}") return df_result # ## Compute optimization input variables # Here we use the loaded daily price data to compute the corresponding yearly mean return and covariance matrix. # In[19]: # Number of securities N = df_prices.shape[1] # Get optimization parameters m, _ = compute_inputs(df_prices) # ## Call the optimizer function # We run the optimization for a range of risk aversion parameter values: $\delta = 10^{-1},\dots,10^{2}$. We compute the efficient frontier this way both with and without using shrinkage estimation. # In[20]: # Compute efficient frontier with and without shrinkage deltas = np.logspace(start=-1, stop=2, num=20)[::-1] df_result = EfficientFrontier(N, T, m, scenarios_lin, p, alpha, deltas) # Check the results. # In[21]: df_result # ## Visualize the results # Plot the efficient frontier. # In[22]: ax = df_result.plot(x="risk", y="return", style="-o", xlabel="portfolio risk (EVaR)", ylabel="portfolio return", grid=True) ax.legend(["return"]); # Plot the portfolio composition. # In[23]: # Round small values to 0 to make plotting work mask = np.absolute(df_result) < 1e-7 mask.iloc[:, :-8] = False df_result[mask] = 0 # In[24]: my_cmap = LinearSegmentedColormap.from_list("non-extreme gray", ["#111111", "#eeeeee"], N=256, gamma=1.0) ax = df_result.set_index('risk').iloc[:, 4:].plot.area(colormap=my_cmap, xlabel='portfolio risk (EVaR)', ylabel="x") ax.grid(which='both', axis='x', linestyle=':', color='k', linewidth=1) # In[ ]: