#!/usr/bin/env python # coding: utf-8 # LaTeX macros (hidden cell) # $ # \newcommand{\Q}{\mathcal{Q}} # \newcommand{\ECov}{\boldsymbol{\Sigma}} # \newcommand{\EMean}{\boldsymbol{\mu}} # \newcommand{\EAlpha}{\boldsymbol{\alpha}} # \newcommand{\EBeta}{\boldsymbol{\beta}} # $ # # Imports and configuration # In[1]: import sys import os import re import datetime as dt import numpy as np import pandas as pd get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib import matplotlib.pyplot as plt from matplotlib.colors import LinearSegmentedColormap from mosek.fusion import * from notebook.services.config import ConfigManager from portfolio_tools import data_download, DataReader, compute_inputs # In[2]: # Version checks print(sys.version) print('matplotlib: {}'.format(matplotlib.__version__)) # Jupyter configuration c = ConfigManager() c.update('notebook', {"CodeCell": {"cm_config": {"autoCloseBrackets": False}}}) # Numpy options np.set_printoptions(precision=5, linewidth=120, suppress=True) # Pandas options pd.set_option('display.max_rows', None) # Matplotlib options plt.rcParams['figure.figsize'] = [12, 8] plt.rcParams['figure.dpi'] = 200 # # Prepare input data # Here we load the raw data that will be used to compute the optimization input variables, the vector $\EMean$ of expected returns and the covariance matrix $\ECov$. The data consists of daily stock prices of $8$ stocks from the US market. # ## Download data # In[3]: # Data downloading: # If the user has an API key for alphavantage.co, then this code part will download the data. # The code can be modified to download from other sources. To be able to run the examples, # and reproduce results in the cookbook, the files have to have the following format and content: # - File name pattern: "daily_adjusted_[TICKER].csv", where TICKER is the symbol of a stock. # - The file contains at least columns "timestamp", "adjusted_close", and "volume". # - The data is daily price/volume, covering at least the period from 2016-03-18 until 2021-03-18, # - Files are for the stocks PM, LMT, MCD, MMM, AAPL, MSFT, TXN, CSCO. list_stocks = ["PM", "LMT", "MCD", "MMM", "AAPL", "MSFT", "TXN", "CSCO"] list_factors = [] alphaToken = None list_tickers = list_stocks + list_factors if alphaToken is not None: data_download(list_tickers, alphaToken) # ## Read data # We load the daily stock price data from the downloaded CSV files. The data is adjusted for splits and dividends. Then a selected time period is taken from the data. # In[4]: investment_start = "2016-03-18" investment_end = "2021-03-18" # In[5]: # The files are in "stock_data" folder, named as "daily_adjusted_[TICKER].csv" dr = DataReader(folder_path="stock_data", symbol_list=list_tickers) dr.read_data() df_prices, _ = dr.get_period(start_date=investment_start, end_date=investment_end) # # Run the optimization # ## Define the optimization model # Below we implement the optimization model in Fusion API. We create it inside a function so we can call it later. # In[6]: # |x| <= t def absval(M, x, t): M.constraint(Expr.add(t, x), Domain.greaterThan(0.0)) M.constraint(Expr.sub(t, x), Domain.greaterThan(0.0)) # ||x||_1 <= t def norm1(M, x, t): u = M.variable(x.getShape(), Domain.unbounded()) absval(M, x, u) M.constraint(Expr.sub(Expr.sum(u), t), Domain.equalsTo(0.0)) def EfficientFrontier(N, m, G, deltas): with Model("Case study") as M: # Settings #M.setLogHandler(sys.stdout) # Variables # The variable x is the fraction of holdings relative to the initial capital. # It is a free variable, allowing long and short positions. x = M.variable("x", N, Domain.unbounded()) # The variable s models the portfolio variance term in the objective. s = M.variable("s", 1, Domain.unbounded()) # Gross exposure constraint (allows 2 times the initial capital) norm1(M, x, 2.0) # Dollar neutrality constraint M.constraint('neutrality', Expr.sum(x), Domain.equalsTo(0.0)) # Objective (quadratic utility version) delta = M.parameter() M.objective('obj', ObjectiveSense.Maximize, Expr.sub(Expr.dot(m, x), Expr.mul(delta, s))) # Conic constraint for the portfolio variance M.constraint('risk', Expr.vstack(s, 0.5, Expr.mul(G.transpose(), x)), Domain.inRotatedQCone()) # Create DataFrame to store the results. Last security name (the SPY) is removed. columns = ["delta", "obj", "return", "risk", "g. exp."] + df_prices.columns.tolist() df_result = pd.DataFrame(columns=columns) for d in deltas: # Update parameter delta.setValue(d) # Solve optimization M.solve() # Check if the solution is an optimal point solsta = M.getPrimalSolutionStatus() if (solsta != SolutionStatus.Optimal): # See https://docs.mosek.com/latest/pythonfusion/accessing-solution.html about handling solution statuses. raise Exception("Unexpected solution status!") # Save results portfolio_return = m @ x.level() portfolio_risk = np.sqrt(s.level()[0]) gross_exp = sum(np.absolute(x.level())) row = pd.Series([d, M.primalObjValue(), portfolio_return, portfolio_risk, gross_exp] + list(x.level()), index=columns) df_result = pd.concat([df_result, pd.DataFrame([row])], ignore_index=True) return df_result # ## Compute optimization input variables # Here we use the loaded daily price data to compute the corresponding yearly mean return and covariance matrix. # In[7]: # Number of securities N = df_prices.shape[1] # Get optimization parameters m, S = compute_inputs(df_prices) # Next we compute the matrix $G$ such that $\ECov=GG^\mathsf{T}$, this is the input of the conic form of the optimization problem. Here we use Cholesky factorization. # In[8]: G = np.linalg.cholesky(S) # ## Call the optimizer function # We run the optimization for a range of risk aversion parameter values: $\delta = 10^{-1},\dots,10^{1.5}$. We compute the efficient frontier this way both with and without using shrinkage estimation. # In[9]: # Compute efficient frontier with and without shrinkage deltas = np.logspace(start=-1, stop=1.5, num=20)[::-1] df_result = EfficientFrontier(N, m, G, deltas) # Check the results. # In[10]: df_result # ## Visualize the results # Plot the efficient frontier. # In[11]: ax = df_result.plot(x="risk", y="return", style="-o", xlabel="portfolio risk (std. dev.)", ylabel="portfolio return", grid=True) ax.legend(["return"]); # Plot the portfolio composition. # In[12]: # Round small values to 0 to make plotting work mask = np.absolute(df_result) < 1e-7 mask.iloc[:, :-8] = False df_result[mask] = 0 # In[13]: my_cmap = LinearSegmentedColormap.from_list("non-extreme gray", ["#111111", "#eeeeee"], N=256, gamma=1.0) ax = df_result.set_index('risk').iloc[:, 4:].plot.area(colormap=my_cmap, xlabel='portfolio risk (std. dev.)', ylabel="x") ax.grid(which='both', axis='x', linestyle=':', color='k', linewidth=1) # In[ ]: