In [1]:
%load_ext autoreload
%autoreload 2

%load_ext watermark

from pathlib import Path
import time 
from pprint import pprint, pformat

import pandas as pd
import numpy as np
import scipy.stats as stats
import statsmodels.api as sm
import numba as nb
from arch.bootstrap import IIDBootstrap, MovingBlockBootstrap, CircularBlockBootstrap

# import visual tools
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline
import seaborn as sns

# import util libs
from tqdm import tqdm, tqdm_notebook
import warnings
warnings.filterwarnings("ignore")

#import sys
#sys.path.append("..")
#from src.tools.pystore_tools import *
#from src.tools.utils import *

%watermark -v -m -g
print()
%watermark --iversions
CPython 3.8.5
IPython 7.19.0

compiler   : GCC 7.3.0
system     : Linux
release    : 5.8.0-36-generic
machine    : x86_64
processor  : x86_64
CPU cores  : 12
interpreter: 64bit
Git hash   :

matplotlib      3.3.2
statsmodels.api 0.12.0
numpy           1.19.2
seaborn         0.11.0
pandas          1.1.3
numba           0.51.2

In [2]:
sns_params = {
    'font.size':9.5,
    'font.weight':'medium',
    'figure.figsize':(10,7),
}

mpl.rcParams["figure.dpi"] = 300
mpl.rcParams["savefig.dpi"] = 100
plt.style.use('seaborn-talk')
#plt.style.use('bmh')
#plt.style.use('dark_background')
sns.set_context(sns_params)
savefig_kwds=dict(dpi=300, bbox_inches='tight', frameon=True, format='png')
nanex_colors = ("#f92b20", "#fe701b", "#facd1f", "#d6fd1c", "#65fe1b",
                "#1bfe42", "#1cfdb4", "#1fb9fa", "#1e71fb", "#261cfd")
nanex_cmap = mpl.colors.ListedColormap(nanex_colors,name='nanex_cmap')
plt.register_cmap('nanex_cmap', cmap=nanex_cmap)
In [3]:
def plot_autocorr(s, lags=50, figsize=(10,7), title=None): 
    fig = plt.figure(figsize=figsize)
    layout = 2, 2
    acf_ax = plt.subplot2grid(layout, (0, 0))
    abs_acf_ax = plt.subplot2grid(layout, (0, 1))
    pacf_ax = plt.subplot2grid(layout, (1, 0))
    squared_ax = plt.subplot2grid(layout, (1, 1))
    
    sm.graphics.tsa.plot_acf(s, fft=True, zero=False, lags=lags, ax=acf_ax,
                             title='Autocorrelation of Returns');
    
    sm.graphics.tsa.plot_acf(s.abs(), fft=True, zero=False,
                             lags=lags, ax=abs_acf_ax,
                             title='Autocorrelation of Absolute Returns');
    
    sm.graphics.tsa.plot_pacf(s, zero=False, lags=lags, ax=pacf_ax,
                              title='Partial Autocorrelation of Returns');
    
    sm.graphics.tsa.plot_acf(s**2, fft=True, zero=False,
                             lags=lags, ax=squared_ax,
                             title='Autocorrelation of Squared Returns');
    
    if title: fig.suptitle(title, fontweight='demi', fontsize=16)
    fig.tight_layout()
    fig.subplots_adjust(top=0.88)
    return

def plot_mean_dist(returns, sim_returns, ax):
    
    mean_return = returns.mean()
    
    ax.set_title(f'{returns.name} mean return: {mean_return:.4f}')
    sim_means = sim_returns.mean().squeeze()

    g = sns.distplot(sim_means, kde=False, ax=ax)
    g.axvline(mean_return, color='r')    
    return
    
def plot_std_dist(returns, sim_returns, ax):
    
    std = returns.std()
    
    ax.set_title(f'{returns.name} return std: {std:.4f}')
    sim_stds = sim_returns.std().squeeze()

    g = sns.distplot(sim_stds, kde=False, ax=ax)
    g.axvline(std, color='r')    
    return

def plot_min_dist(returns, sim_returns, ax):
    
    min_ = returns.min()
    
    ax.set_title(f'{returns.name} return min: {min_:.4f}')
    sim_mins = sim_returns.min().squeeze()

    g = sns.distplot(sim_mins, kde=False, ax=ax)
    g.axvline(min_, color='r')    
    return

def plot_max_dist(returns, sim_returns, ax):
    
    max_ = returns.max()
    
    ax.set_title(f'{returns.name} return max: {max_:.4f}')
    sim_maxs = sim_returns.max().squeeze()

    g = sns.distplot(sim_maxs, kde=False, ax=ax)
    g.axvline(max_, color='r')    
    return

def plot_autocorr_dist(returns, sim_returns, ax):
    
    autocorr = returns.autocorr()

    ax.set_title(f'{returns.name} return autocorr: {autocorr:.4f}')
    sim_autocorrs = sim_returns.apply(pd.Series.autocorr).squeeze()

    g = sns.distplot(sim_autocorrs, kde=False, ax=ax)
    g.axvline(autocorr, color='r') 
    return

def plot_stat_dist(returns, sim_returns, figsize=(10,7)):
    fig = plt.figure(figsize=figsize, constrained_layout=True)
    gs = fig.add_gridspec(3, 2)
    plot_mean_dist(returns, sim_returns, fig.add_subplot(gs[0,0]))
    plot_std_dist(returns, sim_returns, fig.add_subplot(gs[0,1]))
    plot_min_dist(returns, sim_returns, fig.add_subplot(gs[1,0])) 
    plot_max_dist(returns, sim_returns, fig.add_subplot(gs[1,1])) 
    plot_autocorr_dist(returns, sim_returns, fig.add_subplot(gs[2,:]))
    
    fig.suptitle(f'{returns.name} simulated stat distributions',
                 fontweight='demi', fontsize=16)
    fig.tight_layout()
    fig.subplots_adjust(top=0.88)
    return
In [8]:
def to_returns(s): return np.log(s/s.shift(1)).dropna()

def to_price_index(df, start=100):
    return (start * (np.exp(df.cumsum())))

def cdescribe(x, n_cols=None):
    if not n_cols:
        d = x.describe()
        d.loc['skew'] = x.skew()
        d.loc['kurtosis'] = x.kurtosis()
        return d
    else:
        x_ = x.sample(n_cols, axis=1)
        d = x_.describe()
        d.loc['skew'] = x_.skew()
        d.loc['kurtosis'] = x_.kurtosis()        
        return d
    
def CBB(s, blocksize, N_paths):
    sim_returns = []

    bs = CircularBlockBootstrap(blocksize, s)
    for i, data in enumerate(tqdm(bs.bootstrap(N_paths))):
        tmp = data[0][0].reset_index(drop=True)
        sim_returns.append(tmp)
    simulations = pd.concat(sim_returns, axis=1, ignore_index=True)
    return simulations     

def MBB(s, blocksize, N_paths):
    sim_returns = []

    bs = MovingBlockBootstrap(blocksize, s)
    for i, data in enumerate(tqdm(bs.bootstrap(N_paths))):
        tmp = data[0][0].reset_index(drop=True)
        sim_returns.append(tmp)
    simulations = pd.concat(sim_returns, axis=1, ignore_index=True)
    return simulations  

def IIDB(s, N_paths):
    sim_returns = []

    bs = IIDBootstrap(s)
    for i, data in enumerate(tqdm(bs.bootstrap(N_paths))):
        tmp = data[0][0].reset_index(drop=True)
        sim_returns.append(tmp)
    simulations = pd.concat(sim_returns, axis=1, ignore_index=True)
    return simulations    

def compare_stats(x, y, n_cols=None):
    pd.options.display.float_format = '{:,.4f}'.format
    data = (pd.concat([cdescribe(x), cdescribe(y, n_cols=n_cols)], axis=1))
    return data

def view_all(real, sims, n_cols=20, cmap=None):
    plt.set_cmap(cmap)
    display(compare_stats(real, sims, n_cols=20))
    plot_stat_dist(real, sims)
    plot_autocorr(real, title=f'{real.name} Real Returns')
    rand_col = np.random.randint(len(sims.columns), size=1)[0]
    plot_autocorr(sims[rand_col], 
                  title=f'Simulated Return Path {rand_col}')
    return
    
def plot_realizations(real, sims, start, 
                      n_plot_paths=50, figsize=(10,7), cmap=None):   
    plt.set_cmap(cmap)
    sim_prices = to_price_index(sims, start=start)

    fig, ax = plt.subplots(figsize=figsize)
    (sim_prices.sample(n_plot_paths, axis=1)
     .plot(legend=False, alpha=0.7, lw=1., ax=ax))
    (to_price_index(real.reset_index(drop=True), start=start)
     .plot(legend=True, ax=ax, lw=5, ls='--', color='k'))
    plt.title(f'{real.name} {n_plot_paths} simulated price paths')
    
def cprint(df, nrows=None, sample=False):
    """
    custom print function to view pandas and dask dataframes

    :param df: dataframe
    :param nrows: number of rows to return
    :param sample: bool, return random sample for view
    :return:
    """
    if not isinstance(df, pd.DataFrame):
        try:
            df = df.to_frame()
        except:
            raise ValueError('object cannot be coerced to df')

    if not nrows: nrows = 5
    print('-' * 79)
    print('dataframe information')
    print('-' * 79)
    if sample:
        print(df.sample(nrows))
    else:
        print(df.tail(nrows))
    print('-' * 50)
    print(df.info())
    print('-' * 79)
    print()
In [9]:
nq = pd.read_csv('NQ.csv', index_col=0)
gc = pd.read_csv('GC.csv', index_col=0)
In [10]:
nq_price = nq['C']
gc_price = gc['C']
nq_returns = to_returns(nq_price).dropna()
gc_returns = to_returns(gc_price).dropna()
nq_returns.name = "NQ - NASDAQ 100 Futures"
gc_returns.name = "GC - Gold Futures"
In [11]:
cprint(nq_returns)
cprint(nq_price)
cprint(gc_returns)
cprint(gc_price)
-------------------------------------------------------------------------------
dataframe information
-------------------------------------------------------------------------------
                     NQ - NASDAQ 100 Futures
2019-12-31 13:35:00                -0.000029
2019-12-31 13:40:00                -0.000114
2019-12-31 13:45:00                 0.000257
2019-12-31 13:50:00                 0.000114
2019-12-31 13:55:00                 0.000257
--------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
Index: 70001 entries, 2019-01-01 15:05:00 to 2019-12-31 13:55:00
Data columns (total 1 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   NQ - NASDAQ 100 Futures  70001 non-null  float64
dtypes: float64(1)
memory usage: 1.1+ MB
None
-------------------------------------------------------------------------------

-------------------------------------------------------------------------------
dataframe information
-------------------------------------------------------------------------------
                           C
2019-12-31 13:35:00  8767.50
2019-12-31 13:40:00  8766.50
2019-12-31 13:45:00  8768.75
2019-12-31 13:50:00  8769.75
2019-12-31 13:55:00  8772.00
--------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
Index: 70002 entries, 2019-01-01 15:00:00 to 2019-12-31 13:55:00
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   C       70002 non-null  float64
dtypes: float64(1)
memory usage: 1.1+ MB
None
-------------------------------------------------------------------------------

-------------------------------------------------------------------------------
dataframe information
-------------------------------------------------------------------------------
                     GC - Gold Futures
2019-12-31 13:35:00          -0.000066
2019-12-31 13:40:00          -0.000132
2019-12-31 13:45:00          -0.000132
2019-12-31 13:50:00           0.000000
2019-12-31 13:55:00          -0.000066
--------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
Index: 70805 entries, 2019-01-01 15:05:00 to 2019-12-31 13:55:00
Data columns (total 1 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   GC - Gold Futures  70805 non-null  float64
dtypes: float64(1)
memory usage: 1.1+ MB
None
-------------------------------------------------------------------------------

-------------------------------------------------------------------------------
dataframe information
-------------------------------------------------------------------------------
                          C
2019-12-31 13:35:00  1520.5
2019-12-31 13:40:00  1520.3
2019-12-31 13:45:00  1520.1
2019-12-31 13:50:00  1520.1
2019-12-31 13:55:00  1520.0
--------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
Index: 70806 entries, 2019-01-01 15:00:00 to 2019-12-31 13:55:00
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   C       70806 non-null  float64
dtypes: float64(1)
memory usage: 1.1+ MB
None
-------------------------------------------------------------------------------

Bootstrap

In [12]:
N_paths = 1000
block_size = 4000
nq_sim_cbb = CBB(nq_returns, blocksize=block_size, N_paths=N_paths)
1000it [00:01, 627.42it/s]
In [13]:
view_all(nq_returns, nq_sim_cbb, cmap=None)
NQ - NASDAQ 100 Futures 678 576 466 710 791 164 385 181 322 ... 509 588 991 842 822 961 611 833 548 774
count 70,001.0000 70,001.0000 70,001.0000 70,001.0000 70,001.0000 70,001.0000 70,001.0000 70,001.0000 70,001.0000 70,001.0000 ... 70,001.0000 70,001.0000 70,001.0000 70,001.0000 70,001.0000 70,001.0000 70,001.0000 70,001.0000 70,001.0000 70,001.0000
mean 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 ... 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
std 0.0006 0.0006 0.0006 0.0006 0.0006 0.0006 0.0006 0.0006 0.0007 0.0006 ... 0.0007 0.0007 0.0006 0.0006 0.0005 0.0006 0.0005 0.0006 0.0007 0.0005
min -0.0172 -0.0172 -0.0125 -0.0172 -0.0172 -0.0172 -0.0172 -0.0172 -0.0172 -0.0172 ... -0.0125 -0.0172 -0.0125 -0.0125 -0.0125 -0.0172 -0.0172 -0.0172 -0.0172 -0.0172
25% -0.0002 -0.0002 -0.0002 -0.0002 -0.0002 -0.0002 -0.0002 -0.0002 -0.0002 -0.0002 ... -0.0002 -0.0003 -0.0002 -0.0002 -0.0002 -0.0002 -0.0002 -0.0002 -0.0002 -0.0002
50% 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 ... 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
75% 0.0002 0.0002 0.0002 0.0002 0.0003 0.0002 0.0002 0.0002 0.0003 0.0002 ... 0.0002 0.0003 0.0002 0.0002 0.0002 0.0002 0.0002 0.0002 0.0002 0.0002
max 0.0127 0.0127 0.0105 0.0087 0.0127 0.0105 0.0127 0.0127 0.0127 0.0105 ... 0.0127 0.0105 0.0127 0.0127 0.0127 0.0127 0.0105 0.0078 0.0127 0.0080
skew -0.7702 -0.8378 -0.5671 -1.1815 -0.6571 -1.6303 -0.9732 -0.6434 -0.7228 -1.5388 ... -0.4074 -0.6764 -0.3770 0.0601 -0.3331 -0.5882 -0.5510 -0.8477 -0.8155 -1.3702
kurtosis 39.0252 43.4387 29.5670 39.6069 29.8651 50.1736 40.0495 29.8928 34.2628 59.4258 ... 26.3966 28.5588 26.3638 38.0721 32.2738 34.3995 31.6570 30.6412 39.1142 50.8183

10 rows × 21 columns

<Figure size 3000x2100 with 0 Axes>
In [14]:
def plot_prices(real, sims, start, price_index,
                      n_plot_paths=50, figsize=(10,7), cmap=None):   
    plt.set_cmap(cmap)
    sim_prices = to_price_index(sims, start=start)
    sim_prices.index = price_index
    display(sim_prices)
    fig, ax = plt.subplots(figsize=figsize)
    (sim_prices.sample(n_plot_paths, axis=1)
     .plot(legend=False, alpha=0.7, lw=1., ax=ax))
    real_prices = to_price_index(real.reset_index(drop=True), start=start)
    real_prices.index = price_index
    real_prices.plot(legend=True, ax=ax, lw=4, ls='--', color='k')
    plt.title(f'{real.name} {n_plot_paths} simulated price paths')
    plt.xticks(rotation=10)

plot_prices(nq_returns, nq_sim_cbb, start=7500, price_index=nq.index[1:], n_plot_paths=30, cmap='nanex_cmap')
0 1 2 3 4 5 6 7 8 9 ... 990 991 992 993 994 995 996 997 998 999
2019-01-01 15:05:00 7,508.9505 7,498.9306 7,505.0105 7,503.1089 7,500.6869 7,499.1628 7,503.5668 7,497.8826 7,502.5826 7,504.9669 ... 7,498.1276 7,500.0000 7,504.0690 7,503.1682 7,502.1950 7,500.8641 7,500.4874 7,499.7756 7,498.2646 7,500.7093
2019-01-01 15:10:00 7,520.8038 7,504.0103 7,503.5789 7,505.7394 7,500.6869 7,497.7675 7,500.0000 7,498.1472 7,503.0521 7,508.0416 ... 7,498.5957 7,496.4797 7,504.9732 7,499.1359 7,498.5367 7,500.2160 7,502.1935 7,499.7756 7,500.2479 7,499.5271
2019-01-01 15:15:00 7,523.4647 7,500.8021 7,511.2140 7,506.2177 7,500.0000 7,497.4885 7,499.7622 7,500.0000 7,503.5217 7,508.0416 ... 7,502.5745 7,489.1876 7,502.2605 7,506.9124 7,497.3172 7,499.7840 7,501.7060 7,496.8587 7,493.0583 7,498.5813
2019-01-01 15:20:00 7,518.6266 7,499.1979 7,513.8385 7,507.6526 7,499.7710 7,493.0235 7,504.9935 7,500.7940 7,504.2260 7,508.0416 ... 7,502.1064 7,492.4565 7,500.6782 7,496.8318 7,497.3172 7,498.9199 7,502.6809 7,495.7369 7,489.8354 7,500.4729
2019-01-01 15:25:00 7,519.8362 7,495.1877 7,510.7368 7,507.1743 7,499.3131 7,496.3722 7,490.9641 7,501.3234 7,502.5826 7,506.6225 ... 7,503.5107 7,490.9478 7,499.7739 7,496.2558 7,495.8539 7,498.4879 7,502.9246 7,493.7175 7,490.3312 7,497.6356
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2019-12-31 13:35:00 10,710.2745 11,770.0660 7,079.9821 11,279.2333 8,923.6426 9,824.5216 10,471.1298 11,101.6288 10,116.3555 11,095.7344 ... 9,973.4456 10,456.2258 8,798.7143 9,478.2119 13,517.5005 13,018.8168 6,340.4519 10,735.2984 10,306.6529 8,192.5285
2019-12-31 13:40:00 10,720.5104 11,770.0660 7,080.6871 11,265.3228 8,929.5024 9,826.6013 10,471.7642 11,103.2242 10,116.0178 11,093.5195 ... 9,973.1189 10,462.1409 8,789.9725 9,470.6363 13,563.6815 13,019.6561 6,340.6332 10,738.8761 10,306.9484 8,193.5262
2019-12-31 13:45:00 10,740.6292 11,772.8607 7,080.9221 11,272.0901 8,926.5725 9,825.7100 10,468.2752 11,103.2242 10,116.6933 11,090.5664 ... 9,977.6933 10,447.8750 8,794.7407 9,463.0608 13,520.5123 13,019.2365 6,340.9956 10,737.4450 10,306.9484 8,193.7756
2019-12-31 13:50:00 10,739.9233 11,772.4615 7,075.7519 11,281.1132 8,926.2795 9,822.1449 10,467.6408 11,104.8197 10,123.4483 11,096.1035 ... 9,981.9409 10,450.6586 8,792.6215 9,466.0910 13,487.3824 13,007.9048 6,341.1769 10,736.7295 10,309.0166 8,192.7780
2019-12-31 13:55:00 10,736.7466 11,767.2714 7,079.9821 11,282.2410 8,911.3370 9,823.9274 10,464.7861 11,106.4151 10,123.7861 11,101.6407 ... 9,979.6537 10,452.0504 8,787.5884 9,467.6061 13,465.7977 13,002.0291 6,341.1769 10,729.5740 10,306.9484 8,193.7756

70001 rows × 1000 columns

<Figure size 3000x2100 with 0 Axes>