import numpy as np
import pandas as pd
import pymc3 as pm
import scipy as sp
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
2 years of returns from a student distibution with 8 df and sd = 0.4 :
data = sp.stats.t.rvs(8, loc = 0, scale = 0.4, size = 504)
data.mean()
0.02568160764702836
data.max()
1.7226215061555732
data.min()
-1.70912886213556
We can try to estimate parameters thanks to scipy :
df_est, loc_est, scale_est = sp.stats.t.fit(data) # df location and scale
print ('Estimation of Parameters')
print ('Degree of freedom : ', df_est)
print ('Mean : ', loc_est)
print ('Standard Deviation : ', scale_est)
Estimation of Parameters Degree of freedom : 17.5264421491 Mean : 0.0217386967047 Standard Deviation : 0.437460833311
x = np.linspace (-8, 8, 504)
fig, ax = plt.subplots(1, 1);
ax.hist(data, bins = 30, normed=True, color = 'grey')
ax.plot(x, sp.stats.t.pdf(x, 3, scale = scale_est), color = 'red')
ax.plot(x, sp.stats.norm.pdf(x, scale = scale_est), color = 'green')
[<matplotlib.lines.Line2D at 0x1181cbf98>]
At first, one could just compute the 5th percentile of the observed data :
np.percentile(data, 0.05)
-1.6768066573720881
Let's try to draw 2000 samples of 504 observations from the distribution we estimated :
data_simulated = sp.stats.t.rvs(df_est, loc_est, scale_est, size = (2000,504))
np.percentile(data_simulated, 0.05, axis = 1)
array([-2.01543826, -1.3562936 , -1.31859994, ..., -1.22757805, -1.58622303, -1.80276241])
np.percentile(data_simulated, 0.05, axis = 1).mean()
-1.497438043381792
Below is the distribution of VaRs taken from our sample :
sns.distplot(np.percentile(data_simulated, 0.05, axis = 1))
//anaconda/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j
<matplotlib.axes._subplots.AxesSubplot at 0x114d31550>
According to my intuition, this approach could be ok if we could be certain of the parameters we estimated earlier which is not the case.
with pm.Model() as model2:
mu = pm.Normal('mean returns', mu=0, sd=.01, testval=data.mean())
sigma = pm.HalfCauchy('volatility', beta=1, testval=data.std())
df = pm.Uniform('df', 0., 10.)
returns = pm.StudentT('returns', nu=df, mu=mu, sd=sigma,
observed=data)
returns_sim = pm.StudentT('returns_sim', nu=df, mu=mu, sd=sigma, shape = 504)
pm.Deterministic('annual volatility',
returns.distribution.variance**.5 * np.sqrt(252))
pm.Deterministic('sharpe', returns.distribution.mean /
returns.distribution.variance**.5 *
np.sqrt(252))
step = pm.NUTS()
trace = pm.sample(2000, step)
100%|██████████| 2500/2500 [01:53<00:00, 24.38it/s]
pm.traceplot(trace);
len(trace['returns_sim'][0])
504
percentiles = np.percentile(trace['returns_sim'], 0.05, axis = 1)
trace['returns_sim']
array([[-0.24997734, 0.37113237, -0.44993306, ..., -0.16581804, -0.0034413 , 0.62730626], [-0.03069486, -0.32997648, 0.72632171, ..., 0.19922195, 0.02399238, -0.42302246], [-0.09759646, 0.39702952, -0.55208884, ..., -0.60195092, 0.03460164, 0.55997644], ..., [ 0.00415516, 0.6320403 , -0.14154658, ..., 0.33196563, -0.01578671, 0.47064749], [-0.05540536, 0.07131875, -0.21332403, ..., -0.49154211, 0.02466654, -0.15787506], [-0.88260183, -0.59977196, -0.88628565, ..., -0.24760705, 0.13141342, -0.74078556]])
len(percentiles)
2000
percentiles
array([-1.60787008, -1.3173338 , -1.90733657, ..., -1.42773211, -1.35546003, -1.25542944])
sns.distplot(percentiles)
//anaconda/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j
<matplotlib.axes._subplots.AxesSubplot at 0x121ef1240>
The Bayesian approach gives a weak probability (but not null) that our VaR is below -3. Earlier it was not the case :
sns.distplot(np.percentile(data_simulated, 0.05, axis = 1))
//anaconda/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j
<matplotlib.axes._subplots.AxesSubplot at 0x122207668>
percentiles.mean()
-1.731044972808309
np.percentile(data, 0.05)
-1.6768066573720881