%matplotlib inline
# importing libraries
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.offline as py
import cufflinks as cf
import seaborn as sns
import pandas as pd
import numpy as np
import quandl
import plotly
import time
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from IPython.display import Markdown, display
from matplotlib.ticker import FuncFormatter
from pandas.core.base import PandasObject
from datetime import datetime
pd.options.display.float_format = '{:,.2f}'.format
# Setting pandas dataframe display options
pd.set_option("display.max_rows", 20)
pd.set_option('display.width', 800)
pd.set_option('max_colwidth', 800)
# Set plotly offline
init_notebook_mode(connected=True)
# Set matplotlib style
plt.style.use('seaborn')
# Set cufflinks offline
cf.go_offline()
# Defining today's Date
from datetime import date
today = date.today()
import PortfolioAnalyser as pa
ISINs = ['IE00B18GC888', 'IWDA', 'XAD5']
Nomes = ['VG_GB', 'IWDA', 'XAD5']
fundos = pd.read_csv('D:/GDrive/_GitHub/Backtester/Data/Cotacoes_diarias.csv', index_col = 'Date', parse_dates = True)[ISINs]
fundos.columns = Nomes
fundos = fundos.dropna()
fundos = pa.normalize(fundos)
Begin = str(fundos.iloc[0].name)[0:10]
End = str(fundos.iloc[-1].name)[0:10]
C:\Users\PC\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3071: DtypeWarning: Columns (517) have mixed types.Specify dtype option on import or set low_memory=False.
Nota: Ver em baixo sobre possibilidade de substituição do Vanguard Global Bond pelo AGGH
pa.print_title('Análise desde ' + Begin + ' até ' + End)
Análise desde 2014-02-27 até 2020-11-06
fundos.iplot(title='Fundos que compõem os portfolios desde ' + Begin + ' até ' + End,
yTitle='Evolução por cada 100 €uros investidos')
nr = 0
pa.print_title('Quadro de performance de ' + fundos.columns[nr])
pa.compute_ms_performance_table(fundos[[fundos.columns[nr]]])
Quadro de performance de VG_GB
Performance from 2014-02-27 to 2020-11-06 (≈ 6.7 years)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-6-4a115fc9699e> in <module> 2 3 pa.print_title('Quadro de performance de ' + fundos.columns[nr]) ----> 4 pa.compute_ms_performance_table(fundos[[fundos.columns[nr]]]) D:\GDrive\_GitHub\Articles_and_studies\PortfolioAnalyser.py in compute_ms_performance_table(DataFrame, freq) 797 elif nr_of_days >= 365*5 and nr_of_days < 365*10: 798 df0 = compute_performance_table(DataFrame) --> 799 df_ytd = compute_performance_table(DataFrame, years='ytd') 800 df1 = compute_performance_table(filter_by_date(DataFrame, years=1), freq=freq) 801 df3 = compute_performance_table(filter_by_date(DataFrame, years=3), freq=freq) D:\GDrive\_GitHub\Articles_and_studies\PortfolioAnalyser.py in compute_performance_table(dataframe, years, freq) 143 if years == 'ytd': 144 --> 145 df = filter_by_date(dataframe, 'ytd') 146 147 start = str(df.index[0])[0:10] NameError: name 'filter_by_date' is not defined
nr = 1
pa.print_title('Quadro de performance de ' + fundos.columns[nr])
pa.compute_ms_performance_table(fundos[[fundos.columns[nr]]])
Quadro de performance de IWDA
Performance from 2014-02-27 to 2020-11-06 (≈ 6.7 years)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-7-cf28b1a53b0c> in <module> 2 3 pa.print_title('Quadro de performance de ' + fundos.columns[nr]) ----> 4 pa.compute_ms_performance_table(fundos[[fundos.columns[nr]]]) D:\GDrive\_GitHub\Articles_and_studies\PortfolioAnalyser.py in compute_ms_performance_table(DataFrame, freq) 797 elif nr_of_days >= 365*5 and nr_of_days < 365*10: 798 df0 = compute_performance_table(DataFrame) --> 799 df_ytd = compute_performance_table(DataFrame, years='ytd') 800 df1 = compute_performance_table(filter_by_date(DataFrame, years=1), freq=freq) 801 df3 = compute_performance_table(filter_by_date(DataFrame, years=3), freq=freq) D:\GDrive\_GitHub\Articles_and_studies\PortfolioAnalyser.py in compute_performance_table(dataframe, years, freq) 143 if years == 'ytd': 144 --> 145 df = filter_by_date(dataframe, 'ytd') 146 147 start = str(df.index[0])[0:10] NameError: name 'filter_by_date' is not defined
nr = 2
pa.print_title('Quadro de performance de ' + fundos.columns[nr])
pa.compute_ms_performance_table(fundos[[fundos.columns[nr]]])
Quadro de performance de IQQ0
Performance from 2014-02-27 to 2020-11-06 (≈ 6.7 years)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-8-69ad01248c94> in <module> 2 3 pa.print_title('Quadro de performance de ' + fundos.columns[nr]) ----> 4 pa.compute_ms_performance_table(fundos[[fundos.columns[nr]]]) D:\GDrive\_GitHub\Articles_and_studies\PortfolioAnalyser.py in compute_ms_performance_table(DataFrame, freq) 797 elif nr_of_days >= 365*5 and nr_of_days < 365*10: 798 df0 = compute_performance_table(DataFrame) --> 799 df_ytd = compute_performance_table(DataFrame, years='ytd') 800 df1 = compute_performance_table(filter_by_date(DataFrame, years=1), freq=freq) 801 df3 = compute_performance_table(filter_by_date(DataFrame, years=3), freq=freq) D:\GDrive\_GitHub\Articles_and_studies\PortfolioAnalyser.py in compute_performance_table(dataframe, years, freq) 143 if years == 'ytd': 144 --> 145 df = filter_by_date(dataframe, 'ytd') 146 147 start = str(df.index[0])[0:10] NameError: name 'filter_by_date' is not defined
conservador = pa.compute_portfolio(fundos, [0.7, 0.15, 0.15])
moderado = pa.compute_portfolio(fundos, [0.5, 0.25, 0.25])
agressivo = pa.compute_portfolio(fundos, [0.2, 0.4, 0.4])
portfolios = pa.merge_time_series(conservador, moderado)
portfolios = pa.merge_time_series(portfolios, agressivo)
portfolios.columns = ['Cons.', 'Mod.', 'Agr.']
Portfolio Conservador:
Portfolio Moderado:
Portfolio Agressivo:
portfolios.iplot(title='Fundos que compõem os portfolios desde ' + Begin + ' até ' + End,
yTitle='Evolução por cada 100 €uros investidos')
round(pa.compute_drawdowns(portfolios), 2).iplot(title='Drawdowns desde ' + Begin + ' até ' + End, yTitle='Percentagem')
pa.print_title('Quadro de performance do portfolio Conservador (70B/30E)')
pa.compute_ms_performance_table(portfolios[['Cons.']])
Quadro de performance do portfolio Conservador (70B/30E)
Performance from 2014-02-27 to 2020-11-06 (≈ 6.7 years)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-12-1fc1610f0894> in <module> 1 pa.print_title('Quadro de performance do portfolio Conservador (70B/30E)') ----> 2 pa.compute_ms_performance_table(portfolios[['Cons.']]) D:\GDrive\_GitHub\Articles_and_studies\PortfolioAnalyser.py in compute_ms_performance_table(DataFrame, freq) 797 elif nr_of_days >= 365*5 and nr_of_days < 365*10: 798 df0 = compute_performance_table(DataFrame) --> 799 df_ytd = compute_performance_table(DataFrame, years='ytd') 800 df1 = compute_performance_table(filter_by_date(DataFrame, years=1), freq=freq) 801 df3 = compute_performance_table(filter_by_date(DataFrame, years=3), freq=freq) D:\GDrive\_GitHub\Articles_and_studies\PortfolioAnalyser.py in compute_performance_table(dataframe, years, freq) 143 if years == 'ytd': 144 --> 145 df = filter_by_date(dataframe, 'ytd') 146 147 start = str(df.index[0])[0:10] NameError: name 'filter_by_date' is not defined
pa.print_title('Quadro de performance do portfolio Moderado (50B/50E)')
pa.compute_ms_performance_table(portfolios[['Mod.']])
Quadro de performance do portfolio Moderado (50B/50E)
Performance from 2014-02-27 to 2020-11-06 (≈ 6.7 years)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-13-82a0b863dc96> in <module> 1 pa.print_title('Quadro de performance do portfolio Moderado (50B/50E)') ----> 2 pa.compute_ms_performance_table(portfolios[['Mod.']]) D:\GDrive\_GitHub\Articles_and_studies\PortfolioAnalyser.py in compute_ms_performance_table(DataFrame, freq) 797 elif nr_of_days >= 365*5 and nr_of_days < 365*10: 798 df0 = compute_performance_table(DataFrame) --> 799 df_ytd = compute_performance_table(DataFrame, years='ytd') 800 df1 = compute_performance_table(filter_by_date(DataFrame, years=1), freq=freq) 801 df3 = compute_performance_table(filter_by_date(DataFrame, years=3), freq=freq) D:\GDrive\_GitHub\Articles_and_studies\PortfolioAnalyser.py in compute_performance_table(dataframe, years, freq) 143 if years == 'ytd': 144 --> 145 df = filter_by_date(dataframe, 'ytd') 146 147 start = str(df.index[0])[0:10] NameError: name 'filter_by_date' is not defined
pa.print_title('Quadro de performance do portfolio Agressivo (20B/80E)')
pa.compute_ms_performance_table(portfolios[['Agr.']])
Quadro de performance do portfolio Agressivo (20B/80E)
Performance from 2014-02-27 to 2020-11-06 (≈ 6.7 years)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-14-7557fecc3e62> in <module> 1 pa.print_title('Quadro de performance do portfolio Agressivo (20B/80E)') ----> 2 pa.compute_ms_performance_table(portfolios[['Agr.']]) D:\GDrive\_GitHub\Articles_and_studies\PortfolioAnalyser.py in compute_ms_performance_table(DataFrame, freq) 797 elif nr_of_days >= 365*5 and nr_of_days < 365*10: 798 df0 = compute_performance_table(DataFrame) --> 799 df_ytd = compute_performance_table(DataFrame, years='ytd') 800 df1 = compute_performance_table(filter_by_date(DataFrame, years=1), freq=freq) 801 df3 = compute_performance_table(filter_by_date(DataFrame, years=3), freq=freq) D:\GDrive\_GitHub\Articles_and_studies\PortfolioAnalyser.py in compute_performance_table(dataframe, years, freq) 143 if years == 'ytd': 144 --> 145 df = filter_by_date(dataframe, 'ytd') 146 147 start = str(df.index[0])[0:10] NameError: name 'filter_by_date' is not defined
pa.print_title('Quadro de retornos anuais dos portfolios')
fundos_norm = portfolios.copy()
# Resampling to yearly (business year)
yearly_quotes_fundos = fundos_norm.resample('BA').last()
# Adding first quote (only if start is in the middle of the year) ### Falta trabalho para automatizar #####
yearly_quotes_fundos = pd.concat([yearly_quotes_fundos.iloc[:1], yearly_quotes_fundos])
yearly_quotes_fundos = pd.concat([pd.DataFrame(fundos_norm.iloc[0]).transpose(), yearly_quotes_fundos.iloc[1:]])
# Returns
yearly_returns_fundos = ((yearly_quotes_fundos / yearly_quotes_fundos.shift(1)) - 1) * 100
yearly_returns_fundos = yearly_returns_fundos.set_index([list(range(fundos_norm.index[0].year - 1, fundos_norm.index[-1].year + 1))]).drop(fundos_norm.index[0].year - 1)
# Calcular em valores percentuais
yearly_returns_fundos = yearly_returns_fundos / 100
# Sns heatmap
fig, ax = plt.subplots()
fig.set_size_inches(12, 4.5)
ax = sns.heatmap(yearly_returns_fundos.transpose(), annot=True, cmap="RdYlGn", linewidths=.2, cbar=False, center=0.02, fmt='.2%')
plt.yticks(rotation=360)
plt.title('Yearly performance')
plt.show()
Quadro de retornos anuais dos portfolios
Para ajustar os pesos ao risco (ajuste ao retorno por esta via é mais instável mas é sempre possibilidade também) que se pretende se quisermos ir mais longe na personalização. Os fundos/ETFs considerados para a optimização são o IWDA (representando global Equity) e Vanguard Global Bonds (representando Global Bonds). As carteiras acima estão construídas dividindo a parte Equity 50% em IWDA e 50% IQQ0.
from scipy.optimize import minimize as sp_minimize
# Helper function
def portfolio_return(weights, returns):
"""
Computes the return on a portfolio from constituent returns and weights
weights are a numpy array or Nx1 matrix and returns are a numpy array or Nx1 matrix
"""
return weights.T @ returns
def portfolio_vol(weights, covmat):
"""
Computes the vol of a portfolio from a covariance matrix and constituent weights
weights are a numpy array or N x 1 maxtrix and covmat is an N x N matrix
"""
return (weights.T @ covmat @ weights)**0.5
def optimal_weights(n_points, cagr_returns,covmat):
"""
"""
target_rs = np.linspace(cagr_returns.min(), cagr_returns.max(), n_points)
weights = [minimize_vol(target_return, cagr_returns,covmat) for target_return in target_rs]
return weights
def compute_ef(n_points, cagr_returns,covmat):
"""
Plots the multi-asset efficient frontier
"""
weights = optimal_weights(n_points, cagr_returns,covmat)
rets = [portfolio_return(w, cagr_returns) for w in weights]
vols = [portfolio_vol(w, covmat) for w in weights]
ef = pd.DataFrame({
"Returns": rets,
"Volatility": vols
})
return ef, weights
def minimize_vol(target_return, cagr_returns, covmat):
"""
Returns the optimal weights that achieve the target return
given a set of expected returns and a covariance matrix
"""
n = cagr_returns.shape[0]
init_guess = np.repeat(1/n, n)
bounds = ((0.0, 1.0),) * n
# construct the constraints
weights_sum_to_1 = {'type': 'eq',
'fun': lambda weights: np.sum(weights) - 1
}
return_is_target = {'type': 'eq',
'args': (cagr_returns,),
'fun': lambda weights, er: target_return - portfolio_return(weights,cagr_returns)
}
# minimum_weight_group = {'type': 'ineq',
# 'fun': lambda weights: np.sum([weights[4], weights[5]]) - 0.1
# }
if len(covmat) == 2:
constraints = (weights_sum_to_1, return_is_target,
minimum_weight_0, minimum_weight_1)
if len(covmat) == 3:
constraints = (weights_sum_to_1, return_is_target,
minimum_weight_0, minimum_weight_1,
minimum_weight_2)
if len(covmat) == 4:
constraints = (weights_sum_to_1, return_is_target,
minimum_weight_0, minimum_weight_1,
minimum_weight_2, minimum_weight_3)
elif len(covmat) == 5:
constraints = (weights_sum_to_1, return_is_target,
minimum_weight_0, minimum_weight_1,
minimum_weight_2, minimum_weight_3,
minimum_weight_4)
elif len(covmat) == 6:
constraints = (weights_sum_to_1, return_is_target,
minimum_weight_0, minimum_weight_1,
minimum_weight_2, minimum_weight_3,
minimum_weight_4, minimum_weight_5)
weights = sp_minimize(portfolio_vol, init_guess,
args=(covmat,), method='SLSQP',
options={'disp': False},
constraints=constraints,
bounds=bounds)
return weights.x
def compute_efficient_frontier(quotes, min_weights, nomes):
cagr_returns = pa.compute_cagr(quotes)
cagr_returns
returns = quotes.pct_change().dropna()
returns
returns_covmat = returns.cov()
returns_covmat_a = returns_covmat * 365
def ef_minimum_weight(ticker_number, min_weight):
globals()['minimum_weight_' + str(ticker_number)] = {'type': 'ineq',
'fun': lambda weights: weights[ticker_number] - min_weight
}
for i in range(len(returns.columns)):
ef_minimum_weight(ticker_number=i, min_weight=min_weights[i])
ef, weights = compute_ef(100, cagr_returns,returns_covmat_a)
weights_100 = np.array(weights) * 100
columns = ['Returns', 'Volatility'] + nomes + ['Sharpe']
ef_2 = pd.concat([ef, pd.DataFrame(weights_100)], axis=1)
ef_2['Volatility'] = ef_2['Volatility'] * 100
ef_2['Sharpe'] = ef_2['Returns'] / ef_2['Volatility']
ef_2.columns=columns
ef_2 = round(ef_2, 4)
return ef_2
nomes = ['VG', 'IWDA', 'XAD5']
ef = compute_efficient_frontier(fundos, [0, 0, 0], nomes)
import plotly.express as px
fig = px.scatter(round(ef, 2), x="Volatility", y="Returns", color='Sharpe',
title='Efficient Frontier',
color_continuous_scale='Viridis_r',
hover_data=nomes, width=990, height=500)
fig.update_layout(
paper_bgcolor="#F5F6F9",
plot_bgcolor="#F5F6F9",
title=dict(x=0.5),
yaxis=dict(
ticksuffix='%',
tickfont=dict(color='#4D5663'),
gridcolor='#E1E5ED',
zerolinecolor='#E1E5ED',
showgrid=True,
title_text="Returns (Anual Mean Geometric Return)"),
xaxis=dict(
ticksuffix='%',
tickfont=dict(color='#4D5663'),
gridcolor='#E1E5ED',
zerolinecolor='#E1E5ED',
showgrid=True,
title_text="Volatility (Anualized Standard Deviation)")
)
ISINs = ['IE00B18GC888', 'AGGH_oficial']
Nomes = ['VG_GB', 'AGGH']
fundos = pd.read_csv('C:/Users/Luis/Google Drive/_GitHub/Backtester/Data/Cotacoes_diarias_all.csv', index_col = 'Date', parse_dates = True)[ISINs]
fundos.columns = Nomes
fundos = fundos.dropna()
fundos = pa.normalize(fundos.loc[:'2020-04-30'])
Begin = str(fundos.iloc[0].name)[0:10]
End = str(fundos.iloc[-1].name)[0:10]
fundos.iplot(title = 'Vanguard Global bond index fund vs iShares Core Global Aggregate Bond UCITS ETF',
yTitle='Crescimento de cada 100 €uros investidos')
Os activos parecem ser suficientemente semelhantes para se poder usar qualquer um deles.
# Downloading funds and creating quotes and returns dataframes
Begin = '2000-03-10'
# End = '2017-08-20' # Só activas se quiseres que acabe num dia especifíco
Tickers = ['DBZB.XETRA']
ETFs = pd.DataFrame()
# Download
for ticker in Tickers:
url = "https://eodhistoricaldata.com/api/eod/" + str(ticker) + "?api_token=5c982bff80deb2.22427269&period=d."
ETF = pd.read_csv(url, index_col = 'Date', parse_dates = True)[['Adjusted_close']].iloc[:-1, :]
ETFs = ETFs.merge(ETF, left_index = True, right_index = True, how='outer')
ETFs.columns = Tickers
ETFs = ETFs.fillna(method='ffill')
ETFs = ETFs.replace(to_replace=0, method='ffill')
ETFs.columns = ['DBZB']
ISINs = ['IE00B18GC888']
Nomes = ['VG_GB']
fundos = pd.read_csv('C:/Users/Luis/Google Drive/_GitHub/Backtester/Data/Cotacoes_diarias_all.csv', index_col = 'Date', parse_dates = True)[ISINs]
fundos.columns = Nomes
fundos = fundos.dropna()
Begin = str(fundos.iloc[0].name)[0:10]
End = str(fundos.iloc[-1].name)[0:10]
fundos = pa.merge_time_series(ETFs, fundos, how='inner')
fundos = pa.normalize(fundos)
fundos.iplot(title= 'Vanguard Global Bond vs Xtrackers II Global Government Bond',
yTitle='Valor por cada 100 €uros investidos')
Tal como na comparação anterior os activos parecem ser suficientemente semelhantes para se poder usar qualquer um deles. Embora, ao contrário dos anteriores, devido ao benchmark não ser o mesmo, a probabilidade de haver desvios no futuro é maior. Pode-se ver isso pelo tracking error ser maior nesta segunda comparação. Tracking error é simplesmente haver um spread maior entre eles. Quanto mais um activo se sobrepõe ao outro menor o tracking error.
from IPython.display import display
from IPython.display import HTML
import IPython.core.display as di # Example: di.display_html('<h3>%s:</h3>' % str, raw=True)
# This line will hide code by default when the notebook is exported as HTML
di.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)
# This line will add a button to toggle visibility of code blocks, for use with the HTML export version
di.display_html('''<button onclick="jQuery('.input_area').toggle(); jQuery('.prompt').toggle();">Código ON/OFF</button>''', raw=True)