%matplotlib inline
import requests
from datetime import datetime
from matplotlib import pyplot as plt
from IPython.display import display, HTML
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from numba import guvectorize
from scipy import signal
from scipy import stats as ss
# utils functions
# ===============
def get_prices(coin_symbol):
"""Get close price.
Given a symbol crytocurrency retrieve last 2k close prices in USD.
Parameters
----------
coin_symbol : str
Returns
-------
price_close : pandas.DataFrame
"""
endpoint = "https://min-api.cryptocompare.com/data/histoday"
params = dict(fsym=coin_symbol, tsym="USD",limit=2000, aggregate=1)
out = requests.get(endpoint, params=params).json()['Data']
data = pd.DataFrame(out).set_index('time')\
.loc[:, ['close']]\
.rename(columns=dict(close=coin_symbol))
return data
def get_symbol_close(coin_symbol_list):
"""Get symbol close.
Given a list of cryptocurrencies symbols retrieve close prices.
Parameters
----------
coin_symbol_list : list
Returns
-------
price_close : pandas.DataFrame
"""
d = [get_prices(coin_sym) for coin_sym in coin_symbol_list]
out = pd.concat(d, axis=1)
out.index = out.index.map(datetime.utcfromtimestamp)
return out.asfreq(out.index.inferred_freq)
def multi_column_df_display(list_dfs, cols=3):
html_table = "<table style='width:100%; border:0px'>{content}</table>"
html_row = "<tr style='border:0px'>{content}</tr>"
html_cell = "<td style='width:{width}%;vertical-align:top;border:0px'>{{content}}</td>"
html_cell = html_cell.format(width=100/cols)
cells = [html_cell.format(content=df.to_html()) for df in list_dfs]
cells += (cols - (len(list_dfs) % cols)) * [html_cell.format(content="")]
rows = [html_row.format(content="".join(cells[i:i+cols])) for i in range(0, len(cells), cols)]
display(HTML(html_table.format(content="".join(rows))))
# hurst functions
# ===============
@guvectorize("float64[:], int64, int64, int64, float64[:]", "(m),(),(),()->()",
cache=True, nopython=True)
def hurst_rs(x, min_chunksize, max_chunksize, num_chunksize, out):
"""Estimate the Hurst exponent using R/S method.
Estimates the Hurst (H) exponent using the R/S method from the time series.
The R/S method consists of dividing the series into pieces of equal size
`series_len` and calculating the rescaled range. This repeats the process
for several `series_len` values and adjusts data regression to obtain the H.
`series_len` will take values between `min_chunksize` and `max_chunksize`,
the step size from `min_chunksize` to `max_chunksize` can be controlled
through the parameter `step_chunksize`.
Parameters
----------
x : 1D-array
A time series to calculate hurst exponent, must have more elements
than `min_chunksize` and `max_chunksize`.
min_chunksize : int
This parameter allow you control the minimum window size.
max_chunksize : int
This parameter allow you control the maximum window size.
num_chunksize : int
This parameter allow you control the size of the step from minimum to
maximum window size. Bigger step means fewer calculations.
out : 1-element-array, optional
one element array to store the output.
Returns
-------
H : float
A estimation of Hurst exponent.
References
----------
Hurst, H. E. (1951). Long term storage capacity of reservoirs. ASCE
Transactions, 116(776), 770-808.
Alessio, E., Carbone, A., Castelli, G. et al. Eur. Phys. J. B (2002) 27:
197. http://dx.doi.org/10.1140/epjb/e20020150
"""
N = len(x)
max_chunksize += 1
rs_tmp = np.empty(N, dtype=np.float64)
chunk_size_list = np.linspace(min_chunksize, max_chunksize, num_chunksize)\
.astype(np.int64)
rs_values_list = np.empty(num_chunksize, dtype=np.float64)
# 1. The series is divided into chunks of chunk_size_list size
for i in range(num_chunksize):
chunk_size = chunk_size_list[i]
# 2. it iterates on the indices of the first observation of each chunk
number_of_chunks = int(len(x) / chunk_size)
for idx in range(number_of_chunks):
# next means no overlapping
# convert index to index selection of each chunk
ini = idx * chunk_size
end = ini + chunk_size
chunk = x[ini:end]
# 2.1 Calculate the RS (chunk_size)
z = np.cumsum(chunk - np.mean(chunk))
rs_tmp[idx] = np.divide(
np.max(z) - np.min(z), # range
np.nanstd(chunk) # standar deviation
)
# 3. Average of RS(chunk_size)
rs_values_list[i] = np.nanmean(rs_tmp[:idx + 1])
# 4. calculate the Hurst exponent.
H, c = np.linalg.lstsq(
a=np.vstack((np.log(chunk_size_list), np.ones(num_chunksize))).T,
b=np.log(rs_values_list)
)[0]
out[0] = H
def hurst_dma(prices, min_chunksize=8, max_chunksize=200, num_chunksize=5):
"""Estimate the Hurst exponent using R/S method.
Estimates the Hurst (H) exponent using the DMA method from the time series.
The DMA method consists on calculate the moving average of size `series_len`
and subtract it to the original series and calculating the standard
deviation of that result. This repeats the process for several `series_len`
values and adjusts data regression to obtain the H. `series_len` will take
values between `min_chunksize` and `max_chunksize`, the step size from
`min_chunksize` to `max_chunksize` can be controlled through the parameter
`step_chunksize`.
Parameters
----------
prices
min_chunksize
max_chunksize
num_chunksize
Returns
-------
hurst_exponent : float
Estimation of hurst exponent.
References
----------
Alessio, E., Carbone, A., Castelli, G. et al. Eur. Phys. J. B (2002) 27:
197. http://dx.doi.org/10.1140/epjb/e20020150
"""
max_chunksize += 1
N = len(prices)
n_list = np.arange(min_chunksize, max_chunksize, num_chunksize, dtype=np.int64)
dma_list = np.empty(len(n_list))
factor = 1 / (N - max_chunksize)
# sweeping n_list
for i, n in enumerate(n_list):
b = np.divide([n - 1] + (n - 1) * [-1], n) # do the same as: y - y_ma_n
noise = np.power(signal.lfilter(b, 1, prices)[max_chunksize:], 2)
dma_list[i] = np.sqrt(factor * np.sum(noise))
H, const = np.linalg.lstsq(
a=np.vstack([np.log10(n_list), np.ones(len(n_list))]).T,
b=np.log10(dma_list)
)[0]
return H
def hurst_dsod(x):
"""Estimate Hurst exponent on data timeseries.
The estimation is based on the discrete second order derivative. Consists on
get two different noise of the original series and calculate the standard
deviation and calculate the slope of two point with that values.
source: https://gist.github.com/wmvanvliet/d883c3fe1402c7ced6fc
Parameters
----------
x : numpy array
time series to estimate the Hurst exponent for.
Returns
-------
h : float
The estimation of the Hurst exponent for the given time series.
References
----------
Istas, J.; G. Lang (1994), “Quadratic variations and estimation of the local
Hölder index of data Gaussian process,” Ann. Inst. Poincaré, 33, pp. 407–436.
Notes
-----
This hurst_ets is data literal traduction of wfbmesti.m of waveleet toolbox
from matlab.
"""
y = np.cumsum(np.diff(x, axis=0), axis=0)
# second order derivative
b1 = [1, -2, 1]
y1 = signal.lfilter(b1, 1, y, axis=0)
y1 = y1[len(b1) - 1:] # first values contain filter artifacts
# wider second order derivative
b2 = [1, 0, -2, 0, 1]
y2 = signal.lfilter(b2, 1, y, axis=0)
y2 = y2[len(b2) - 1:] # first values contain filter artifacts
s1 = np.mean(y1 ** 2, axis=0)
s2 = np.mean(y2 ** 2, axis=0)
return 0.5 * np.log2(s2 / s1)
def hurst_exponent(prices, min_chunksize=8, max_chunksize=200, num_chunksize=5,
method='RS'):
"""Estimates Hurst Exponent.
Estimate the hurst exponent following one of 3 methods. Each method
Parameters
----------
prices : numpy.ndarray, pandas.Series or pandas.DataFrame
A time series to estimate hurst exponent.
min_chunksize : int, optional
Minimum chunk size of the original series. This parameter doesn't have
any effect with DSOD method.
max_chunksize : int, optional
Maximum chunk size of the original series. This parameter doesn't have
any effect with DSOD method.
step_chunksize : int, optional
Step used to select next the chunk size which divide the original
series. This parameter doesn't have any effect with DSOD method.
method : {'RS', 'DMA', 'DSOD', 'all'}
The methods can take one of that values,
RS : rescaled range.
DMA : deviation moving average.
DSOD : discrete second order derivative.
Returns
-------
hurst_exponent : float
Estimation of hurst_exponent according to the method selected.
References
----------
RS : Hurst, H. E. (1951). Long term storage capacity of reservoirs. ASCE
Transactions, 116(776), 770-808.
DMA : Alessio, E., Carbone, A., Castelli, G. et al. Eur. Phys. J. B (2002)
27: 197. http://dx.doi.org/10.1140/epjb/e20020150
DSOD : Istas, J.; G. Lang (1994), “Quadratic variations and estimation of
the local Hölder index of data Gaussian process,” Ann. Inst. Poincaré,
33, pp. 407–436.
Notes
-----
The hurst exponent is an estimation which is important because there is no
data closed equation for it instead we have some methods to estimate it with
high variations among them.
See Also
--------
hurst_rs, hurst_dma, hurst_dsod
"""
if len(prices) == 0:
return np.nan
# extract array
arr = prices.__array__()
# choose data method
if method == 'RS':
if prices.ndim > 1:
h = hurst_rs(np.diff(arr, axis=0).T, min_chunksize, max_chunksize,
num_chunksize)
else:
h = hurst_rs(np.diff(arr), min_chunksize, max_chunksize,
num_chunksize)
elif method == 'DMA':
h = hurst_dma(arr, min_chunksize, max_chunksize, num_chunksize)
elif method == 'DSOD':
h = hurst_dsod(arr)
elif method == 'all':
return [
hurst_exponent(arr, min_chunksize, max_chunksize, num_chunksize, 'RS'),
hurst_exponent(arr, min_chunksize, max_chunksize, num_chunksize, 'DMA'),
hurst_exponent(arr, min_chunksize, max_chunksize, num_chunksize, 'DSOD')
]
else:
raise NotImplementedError('The method choose is not implemented.')
return h