In [1]:
%matplotlib inline

import math
import warnings
import numpy as np

# specify to ignore warning messages
warnings.filterwarnings("ignore") 

from lstm_utils import get_raw_data
from lstm_utils import get_seq_model
from lstm_utils import get_seq_train_test
from keras.preprocessing.sequence import pad_sequences

from sklearn.metrics import mean_squared_error
Using Theano backend.
In [2]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_style('whitegrid')
sns.set_context('talk')

params = {'legend.fontsize': 'x-large',
          'figure.figsize': (15, 5),
          'axes.labelsize': 'x-large',
          'axes.titlesize':'x-large',
          'xtick.labelsize':'x-large',
          'ytick.labelsize':'x-large'}

plt.rcParams.update(params)

Set Parameters

In [3]:
TRAIN_PERCENT = 0.7
STOCK_INDEX = '^GSPC'
VERBOSE=True

Getting Data

In [4]:
sp_df = get_raw_data(STOCK_INDEX)
sp_close_series = sp_df.Close 

sp_close_series.plot()
Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x1635619bdd8>

Preprocessing Data

In [5]:
# split train and test datasets
train,test,scaler = get_seq_train_test(sp_close_series,
                                   scaling=True,
                                   train_size=TRAIN_PERCENT)

train = np.reshape(train,(1,train.shape[0],1))
test = np.reshape(test,(1,test.shape[0],1))

train_x = train[:,:-1,:]
train_y = train[:,1:,:]

test_x = test[:,:-1,:]
test_y = test[:,1:,:]

print("Data Split Complete")

print("train_x shape={}".format(train_x.shape))
print("train_y shape={}".format(train_y.shape))
print("test_x shape={}".format(test_x.shape))
print("test_y shape={}".format(test_y.shape))
Data Split Complete
train_x shape=(1, 1965, 1)
train_y shape=(1, 1965, 1)
test_x shape=(1, 842, 1)
test_y shape=(1, 842, 1)
C:\Anaconda2\envs\python3\lib\site-packages\sklearn\preprocessing\data.py:321: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
C:\Anaconda2\envs\python3\lib\site-packages\sklearn\preprocessing\data.py:356: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)

Prepare LSTM Model

In [6]:
# build RNN model
seq_lstm_model=None
try:
    seq_lstm_model = get_seq_model(input_shape=(train_x.shape[1],1),
                                                verbose=VERBOSE)   
except:
    print("Model Build Failed. Trying Again")
    seq_lstm_model = get_seq_model(input_shape=(train_x.shape[1],1),
                                                verbose=VERBOSE)
Model Build Failed. Trying Again
> Compilation Time :  0.008561372756958008
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lstm_2 (LSTM)                (None, 1965, 4)           96        
_________________________________________________________________
time_distributed_1 (TimeDist (None, 1965, 1)           5         
=================================================================
Total params: 101
Trainable params: 101
Non-trainable params: 0
_________________________________________________________________
None

Fit the Model

In [7]:
# train the model
seq_lstm_model.fit(train_x, train_y, 
               epochs=150, batch_size=1, 
               verbose=2)
print("Model Fit Complete")
Epoch 1/150
0s - loss: 0.1923
Epoch 2/150
0s - loss: 0.1838
Epoch 3/150
0s - loss: 0.1781
Epoch 4/150
0s - loss: 0.1734
Epoch 5/150
0s - loss: 0.1694
Epoch 6/150
0s - loss: 0.1658
Epoch 7/150
0s - loss: 0.1625
Epoch 8/150
0s - loss: 0.1593
Epoch 9/150
0s - loss: 0.1564
Epoch 10/150
0s - loss: 0.1535
Epoch 11/150
0s - loss: 0.1507
Epoch 12/150
0s - loss: 0.1479
Epoch 13/150
0s - loss: 0.1452
Epoch 14/150
0s - loss: 0.1426
Epoch 15/150
0s - loss: 0.1399
Epoch 16/150
0s - loss: 0.1373
Epoch 17/150
0s - loss: 0.1346
Epoch 18/150
0s - loss: 0.1320
Epoch 19/150
0s - loss: 0.1294
Epoch 20/150
0s - loss: 0.1267
Epoch 21/150
0s - loss: 0.1241
Epoch 22/150
0s - loss: 0.1214
Epoch 23/150
0s - loss: 0.1188
Epoch 24/150
0s - loss: 0.1162
Epoch 25/150
0s - loss: 0.1135
Epoch 26/150
0s - loss: 0.1109
Epoch 27/150
0s - loss: 0.1083
Epoch 28/150
0s - loss: 0.1056
Epoch 29/150
0s - loss: 0.1030
Epoch 30/150
0s - loss: 0.1004
Epoch 31/150
0s - loss: 0.0978
Epoch 32/150
0s - loss: 0.0952
Epoch 33/150
0s - loss: 0.0926
Epoch 34/150
0s - loss: 0.0900
Epoch 35/150
0s - loss: 0.0875
Epoch 36/150
0s - loss: 0.0849
Epoch 37/150
0s - loss: 0.0824
Epoch 38/150
0s - loss: 0.0798
Epoch 39/150
0s - loss: 0.0774
Epoch 40/150
0s - loss: 0.0749
Epoch 41/150
0s - loss: 0.0724
Epoch 42/150
0s - loss: 0.0700
Epoch 43/150
0s - loss: 0.0676
Epoch 44/150
0s - loss: 0.0653
Epoch 45/150
0s - loss: 0.0630
Epoch 46/150
0s - loss: 0.0607
Epoch 47/150
0s - loss: 0.0584
Epoch 48/150
0s - loss: 0.0562
Epoch 49/150
0s - loss: 0.0541
Epoch 50/150
0s - loss: 0.0520
Epoch 51/150
0s - loss: 0.0500
Epoch 52/150
0s - loss: 0.0480
Epoch 53/150
0s - loss: 0.0460
Epoch 54/150
0s - loss: 0.0442
Epoch 55/150
0s - loss: 0.0424
Epoch 56/150
0s - loss: 0.0406
Epoch 57/150
0s - loss: 0.0390
Epoch 58/150
0s - loss: 0.0374
Epoch 59/150
0s - loss: 0.0359
Epoch 60/150
0s - loss: 0.0344
Epoch 61/150
0s - loss: 0.0331
Epoch 62/150
0s - loss: 0.0318
Epoch 63/150
0s - loss: 0.0305
Epoch 64/150
0s - loss: 0.0294
Epoch 65/150
0s - loss: 0.0283
Epoch 66/150
0s - loss: 0.0273
Epoch 67/150
0s - loss: 0.0264
Epoch 68/150
0s - loss: 0.0255
Epoch 69/150
0s - loss: 0.0247
Epoch 70/150
0s - loss: 0.0240
Epoch 71/150
0s - loss: 0.0233
Epoch 72/150
0s - loss: 0.0227
Epoch 73/150
0s - loss: 0.0221
Epoch 74/150
0s - loss: 0.0215
Epoch 75/150
0s - loss: 0.0210
Epoch 76/150
0s - loss: 0.0205
Epoch 77/150
0s - loss: 0.0201
Epoch 78/150
0s - loss: 0.0196
Epoch 79/150
0s - loss: 0.0192
Epoch 80/150
0s - loss: 0.0188
Epoch 81/150
0s - loss: 0.0184
Epoch 82/150
0s - loss: 0.0180
Epoch 83/150
0s - loss: 0.0177
Epoch 84/150
0s - loss: 0.0173
Epoch 85/150
0s - loss: 0.0169
Epoch 86/150
0s - loss: 0.0165
Epoch 87/150
0s - loss: 0.0161
Epoch 88/150
0s - loss: 0.0157
Epoch 89/150
0s - loss: 0.0153
Epoch 90/150
0s - loss: 0.0149
Epoch 91/150
0s - loss: 0.0145
Epoch 92/150
0s - loss: 0.0141
Epoch 93/150
0s - loss: 0.0137
Epoch 94/150
0s - loss: 0.0133
Epoch 95/150
0s - loss: 0.0129
Epoch 96/150
0s - loss: 0.0124
Epoch 97/150
0s - loss: 0.0120
Epoch 98/150
0s - loss: 0.0116
Epoch 99/150
0s - loss: 0.0111
Epoch 100/150
0s - loss: 0.0107
Epoch 101/150
0s - loss: 0.0103
Epoch 102/150
0s - loss: 0.0098
Epoch 103/150
0s - loss: 0.0094
Epoch 104/150
0s - loss: 0.0090
Epoch 105/150
0s - loss: 0.0086
Epoch 106/150
0s - loss: 0.0082
Epoch 107/150
0s - loss: 0.0078
Epoch 108/150
0s - loss: 0.0073
Epoch 109/150
0s - loss: 0.0070
Epoch 110/150
0s - loss: 0.0066
Epoch 111/150
0s - loss: 0.0062
Epoch 112/150
0s - loss: 0.0058
Epoch 113/150
0s - loss: 0.0055
Epoch 114/150
0s - loss: 0.0051
Epoch 115/150
0s - loss: 0.0048
Epoch 116/150
0s - loss: 0.0045
Epoch 117/150
0s - loss: 0.0042
Epoch 118/150
0s - loss: 0.0039
Epoch 119/150
0s - loss: 0.0036
Epoch 120/150
0s - loss: 0.0033
Epoch 121/150
0s - loss: 0.0031
Epoch 122/150
0s - loss: 0.0028
Epoch 123/150
0s - loss: 0.0026
Epoch 124/150
0s - loss: 0.0024
Epoch 125/150
0s - loss: 0.0022
Epoch 126/150
0s - loss: 0.0020
Epoch 127/150
0s - loss: 0.0018
Epoch 128/150
0s - loss: 0.0017
Epoch 129/150
0s - loss: 0.0015
Epoch 130/150
0s - loss: 0.0014
Epoch 131/150
0s - loss: 0.0013
Epoch 132/150
0s - loss: 0.0012
Epoch 133/150
0s - loss: 0.0011
Epoch 134/150
0s - loss: 0.0010
Epoch 135/150
0s - loss: 9.3632e-04
Epoch 136/150
0s - loss: 8.7300e-04
Epoch 137/150
0s - loss: 8.1800e-04
Epoch 138/150
0s - loss: 7.7047e-04
Epoch 139/150
0s - loss: 7.2956e-04
Epoch 140/150
0s - loss: 6.9441e-04
Epoch 141/150
0s - loss: 6.6422e-04
Epoch 142/150
0s - loss: 6.3824e-04
Epoch 143/150
0s - loss: 6.1577e-04
Epoch 144/150
0s - loss: 5.9622e-04
Epoch 145/150
0s - loss: 5.7904e-04
Epoch 146/150
0s - loss: 5.6379e-04
Epoch 147/150
0s - loss: 5.5009e-04
Epoch 148/150
0s - loss: 5.3764e-04
Epoch 149/150
0s - loss: 5.2620e-04
Epoch 150/150
0s - loss: 5.1558e-04
Model Fit Complete

Train Prediction Performance

In [8]:
# train fit performance
trainPredict = seq_lstm_model.predict(train_x)
trainScore = math.sqrt(mean_squared_error(train_y[0], trainPredict[0]))
print('Train Score: %.2f RMSE' % (trainScore))
Train Score: 0.02 RMSE

Test Prediction Performance

In [9]:
# Pad input sequence
testPredict = pad_sequences(test_x,
                                maxlen=train_x.shape[1],
                                padding='post',
                                dtype='float64')
In [10]:
# forecast values
testPredict = seq_lstm_model.predict(testPredict)

# evaluate performances
testScore = math.sqrt(mean_squared_error(test_y[0], 
                                         testPredict[0][:test_x.shape[1]]))
print('Test Score: %.2f RMSE' % (testScore))
Test Score: 0.07 RMSE

Plot Test Predictions

In [11]:
# inverse transformation
trainPredict = scaler.inverse_transform(trainPredict.\
                                        reshape(trainPredict.shape[1]))
testPredict = scaler.inverse_transform(testPredict.\
                                       reshape(testPredict.shape[1]))
C:\Anaconda2\envs\python3\lib\site-packages\sklearn\preprocessing\data.py:374: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
C:\Anaconda2\envs\python3\lib\site-packages\sklearn\preprocessing\data.py:374: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
In [12]:
# plot the true and forecasted values
train_size = len(trainPredict)+1

plt.plot(sp_close_series.index,
         sp_close_series.values,c='black',
         alpha=0.3,label='True Data')
plt.plot(sp_close_series.index[1:train_size],
         trainPredict,label='Training Fit',c='g')
plt.plot(sp_close_series.index[train_size+1:],
         testPredict[:test_x.shape[1]],label='Testing Forecast')
plt.title('Forecast Plot')
plt.legend()
plt.show()