import gdax
import pandas as pd
from time import sleep
import plotly.plotly as py
from plotly.offline import init_notebook_mode, plot, iplot
import plotly.graph_objs as go
from datetime import datetime
init_notebook_mode(connected=False)
import cufflinks
from keras.layers.core import Dense, Activation, Dropout, Flatten
from keras.layers.recurrent import LSTM
from keras.models import Sequential
import time
from keras_tqdm import TQDMNotebookCallback
import tqdm
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
import numpy as np
def get_loads(symbol, start=None, end=None, granularity=86400):
""" This was fiddly so I'm not live-coding this one"""
public_client = gdax.PublicClient()
if end is None:
end = pd.to_datetime('now')
if start is None:
start = end-pd.Timedelta(seconds=granularity)
while True:
response = public_client.get_product_historic_rates(
product_id=symbol,
granularity=granularity,
start=start.isoformat(),
end=end.isoformat()
)
if not response:
raise StopIteration()
if not isinstance(response,list):
raise ValueError(response)
for r in response:
yield r
sleep(3)
end = pd.to_datetime(r[0], unit='s')
start = end-pd.Timedelta(seconds=granularity*len(response))
print(f"{start}-{end}")
def ts_df_to_supervised(df, lag=1):
columns = [df.shift(i) for i in range(1, lag+1)]
columns.append(df)
_df = pd.concat(columns, axis=1)
_df.fillna(0, inplace=True)
return _df
def difference(X, lag=1):
return X.diff(lag).dropna()
def inverse_difference(history, yhat, interval=1):
return yhat + history[-interval]
def twodim_to_threedim(X):
return X.values.reshape(X.shape[0],1,X.shape[1])
def fit_lstm(train, batch_size, nb_epoch, neurons):
X, y = train[:, 0:-1], train[:, -1]
X = X.reshape(X.shape[0], 1, X.shape[1])
model = Sequential()
model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
for i in tqdm.tqdm_notebook(range(nb_epoch)):
model.fit(X, y, epochs=1, batch_size=batch_size,
verbose=0, shuffle=False,
#callbacks=[TQDMNotebookCallback(leave_inner=False,leave_outer=False)]
)
model.reset_states()
return model
def forecast(model, batch_size, row):
X = row[0:-1]
X = X.reshape(1, 1, len(X))
yhat = model.predict(X, batch_size=batch_size)
return yhat[0,0]
def scale(train, test):
# fit scaler to both sets
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(train.values)
# transform train
train = train.values.reshape(train.shape[0], train.shape[1])
train_scaled = scaler.transform(train)
# transform test
test = test.values.reshape(test.shape[0], test.shape[1])
test_scaled = scaler.transform(test)
return scaler, train_scaled, test_scaled
df = pd.DataFrame(
get_loads('BTC-USD'),
columns=['time','low','high','open','close','volume']
)
df['time'] = pd.to_datetime(df['time'], unit='s')
tscv = TimeSeriesSplit(max_train_size = df.shape[0]//2, n_splits=5)
tt_sets = []
for train_index, test_index in tscv.split(df.close.values):
tt_sets.append((train_index,test_index))
print(f"TRAIN:, {min(train_index)} ++ {len(train_index)}, TEST: {min(test_index)} ++ {len(test_index)}")
raw_X = df.set_index('time')['close'].sort_index() # Sort Closes
dX = difference(raw_X) # Stationarise
sup_dX = ts_df_to_supervised(dX) # Supervise
sup_dX_train = sup_dX.iloc[tt_sets[0][0]] # Training Set
sup_dX_test = sup_dX.iloc[tt_sets[0][1]] # Test Set