In [1]:

import numpy as np
import pandas as pd
import pymc3 as pm
import scipy as sp

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

Generate toy data¶

2 years of returns from a student distibution with 8 df and sd = 0.4 :

In [2]:

data = sp.stats.t.rvs(8, loc = 0, scale = 0.4,  size = 504)

Study the returns¶

In [3]:

data.mean()

Out[3]:

0.02568160764702836

In [4]:

data.max()

Out[4]:

1.7226215061555732

In [5]:

data.min()

Out[5]:

-1.70912886213556

We can try to estimate parameters thanks to scipy :

In [6]:

df_est, loc_est, scale_est = sp.stats.t.fit(data) # df location and scale

In [7]:

print ('Estimation of Parameters')
print ('Degree of freedom : ', df_est)
print ('Mean : ', loc_est)
print ('Standard Deviation : ', scale_est)

Estimation of Parameters
Degree of freedom :  17.5264421491
Mean :  0.0217386967047
Standard Deviation :  0.437460833311

In [8]:

x = np.linspace (-8, 8, 504)

In [9]:

fig, ax = plt.subplots(1, 1);
ax.hist(data, bins = 30, normed=True, color = 'grey')
ax.plot(x, sp.stats.t.pdf(x, 3, scale = scale_est), color = 'red')
ax.plot(x, sp.stats.norm.pdf(x, scale = scale_est), color = 'green')

Out[9]:

[<matplotlib.lines.Line2D at 0x1181cbf98>]

A First VaR¶

At first, one could just compute the 5th percentile of the observed data :

In [10]:

np.percentile(data, 0.05)

Out[10]:

-1.6768066573720881

Let's try to draw 2000 samples of 504 observations from the distribution we estimated :

In [11]:

data_simulated = sp.stats.t.rvs(df_est, loc_est, scale_est, size = (2000,504))

In [12]:

np.percentile(data_simulated, 0.05, axis = 1)

Out[12]:

array([-2.01543826, -1.3562936 , -1.31859994, ..., -1.22757805,
       -1.58622303, -1.80276241])

In [13]:

np.percentile(data_simulated, 0.05, axis = 1).mean()

Out[13]:

-1.497438043381792

Below is the distribution of VaRs taken from our sample :

In [14]:

sns.distplot(np.percentile(data_simulated, 0.05, axis = 1))

//anaconda/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j

Out[14]:

<matplotlib.axes._subplots.AxesSubplot at 0x114d31550>

According to my intuition, this approach could be ok if we could be certain of the parameters we estimated earlier which is not the case.

A Bayesian Approach¶

In [15]:

with pm.Model() as model2:
    mu = pm.Normal('mean returns', mu=0, sd=.01, testval=data.mean())
    sigma = pm.HalfCauchy('volatility', beta=1, testval=data.std())
    df = pm.Uniform('df', 0., 10.)

    returns = pm.StudentT('returns', nu=df, mu=mu, sd=sigma,
                       observed=data)
    
    returns_sim = pm.StudentT('returns_sim', nu=df, mu=mu, sd=sigma, shape = 504)
        
    pm.Deterministic('annual volatility',
                     returns.distribution.variance**.5 * np.sqrt(252))

    pm.Deterministic('sharpe', returns.distribution.mean /
                     returns.distribution.variance**.5 *
                     np.sqrt(252))
 
    step = pm.NUTS()
    trace = pm.sample(2000, step)

100%|██████████| 2500/2500 [01:53<00:00, 24.38it/s]

In [16]:

pm.traceplot(trace);

In [17]:

len(trace['returns_sim'][0])

Out[17]:

In [18]:

percentiles = np.percentile(trace['returns_sim'], 0.05, axis = 1)

In [19]:

trace['returns_sim']

Out[19]:

array([[-0.24997734,  0.37113237, -0.44993306, ..., -0.16581804,
        -0.0034413 ,  0.62730626],
       [-0.03069486, -0.32997648,  0.72632171, ...,  0.19922195,
         0.02399238, -0.42302246],
       [-0.09759646,  0.39702952, -0.55208884, ..., -0.60195092,
         0.03460164,  0.55997644],
       ..., 
       [ 0.00415516,  0.6320403 , -0.14154658, ...,  0.33196563,
        -0.01578671,  0.47064749],
       [-0.05540536,  0.07131875, -0.21332403, ..., -0.49154211,
         0.02466654, -0.15787506],
       [-0.88260183, -0.59977196, -0.88628565, ..., -0.24760705,
         0.13141342, -0.74078556]])

In [20]:

len(percentiles)

Out[20]:

In [21]:

percentiles

Out[21]:

array([-1.60787008, -1.3173338 , -1.90733657, ..., -1.42773211,
       -1.35546003, -1.25542944])

In [22]:

sns.distplot(percentiles)

//anaconda/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j

Out[22]:

<matplotlib.axes._subplots.AxesSubplot at 0x121ef1240>

The Bayesian approach gives a weak probability (but not null) that our VaR is below -3. Earlier it was not the case :

In [25]:

sns.distplot(np.percentile(data_simulated, 0.05, axis = 1))

//anaconda/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j

Out[25]:

<matplotlib.axes._subplots.AxesSubplot at 0x122207668>

In [23]:

percentiles.mean()

Out[23]:

-1.731044972808309

In [24]:

np.percentile(data, 0.05)

Out[24]:

-1.6768066573720881

In [ ]: