El artículo completo con la explicación detallada en el blog: http://www.aprendemachinelearning.com/
Importamos las Librerías que vamos a utilizar
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (16, 9)
plt.style.use('fast')
from keras.models import Sequential
from keras.layers import Dense,Activation,Flatten
from sklearn.preprocessing import MinMaxScaler
from keras.layers import Input, Embedding, Dense, Flatten, Dropout, concatenate, LSTM
from keras.layers import BatchNormalization, SpatialDropout1D
from keras.callbacks import Callback
from keras.models import Model
from keras.optimizers import Adam
df = pd.read_csv('time_series.csv', parse_dates=[0], header=None,index_col=0, names=['fecha','unidades'])
df.head()
unidades | |
---|---|
fecha | |
2017-01-02 | 236 |
2017-01-03 | 237 |
2017-01-04 | 290 |
2017-01-05 | 221 |
2017-01-07 | 128 |
df['weekday']=[x.weekday() for x in df.index]
df['month']=[x.month for x in df.index]
df.head()
unidades | weekday | month | |
---|---|---|---|
fecha | |||
2017-01-02 | 236 | 0 | 1 |
2017-01-03 | 237 | 1 | 1 |
2017-01-04 | 290 | 2 | 1 |
2017-01-05 | 221 | 3 | 1 |
2017-01-07 | 128 | 5 | 1 |
df.describe()
unidades | weekday | month | |
---|---|---|---|
count | 604.000000 | 604.000000 | 604.000000 |
mean | 215.935430 | 2.644040 | 6.304636 |
std | 75.050304 | 1.818674 | 3.312359 |
min | 51.000000 | 0.000000 | 1.000000 |
25% | 171.000000 | 1.000000 | 3.000000 |
50% | 214.000000 | 3.000000 | 6.000000 |
75% | 261.250000 | 4.000000 | 9.000000 |
max | 591.000000 | 6.000000 | 12.000000 |
PASOS=7
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = pd.DataFrame(data)
cols, names = list(), list()
# input sequence (t-n, ... t-1)
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
# forecast sequence (t, t+1, ... t+n)
for i in range(0, n_out):
cols.append(df.shift(-i))
if i == 0:
names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
else:
names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
# put it all together
agg = pd.concat(cols, axis=1)
agg.columns = names
# drop rows with NaN values
if dropnan:
agg.dropna(inplace=True)
return agg
# load dataset
values = df['unidades'].values
# ensure all data is float
values = values.astype('float32')
# normalize features
scaler = MinMaxScaler(feature_range=(-1, 1))
values=values.reshape(-1, 1) # esto lo hacemos porque tenemos 1 sola dimension
scaled = scaler.fit_transform(values)
reframed = series_to_supervised(scaled, PASOS, 1)
reframed.reset_index(inplace=True, drop=True)
contador=0
reframed['weekday']=df['weekday']
reframed['month']=df['month']
for i in range(reframed.index[0],reframed.index[-1]):
reframed['weekday'].loc[contador]=df['weekday'][i+8]
reframed['month'].loc[contador]=df['month'][i+8]
contador=contador+1
reframed.head()
/Users/jbagnato/anaconda3/envs/python36/lib/python3.6/site-packages/pandas/core/indexing.py:194: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy self._setitem_with_indexer(indexer, value)
var1(t-7) | var1(t-6) | var1(t-5) | var1(t-4) | var1(t-3) | var1(t-2) | var1(t-1) | var1(t) | weekday | month | |
---|---|---|---|---|---|---|---|---|---|---|
0 | -0.314815 | -0.311111 | -0.114815 | -0.370370 | -0.714815 | -0.103704 | -0.225926 | -0.433333 | 3.0 | 1.0 |
1 | -0.311111 | -0.114815 | -0.370370 | -0.714815 | -0.103704 | -0.225926 | -0.433333 | -0.607407 | 4.0 | 1.0 |
2 | -0.114815 | -0.370370 | -0.714815 | -0.103704 | -0.225926 | -0.433333 | -0.607407 | -0.522222 | 5.0 | 1.0 |
3 | -0.370370 | -0.714815 | -0.103704 | -0.225926 | -0.433333 | -0.607407 | -0.522222 | -0.644444 | 0.0 | 1.0 |
4 | -0.714815 | -0.103704 | -0.225926 | -0.433333 | -0.607407 | -0.522222 | -0.644444 | -0.344444 | 1.0 | 1.0 |
reordenado=reframed[ ['weekday','month','var1(t-7)','var1(t-6)','var1(t-5)','var1(t-4)','var1(t-3)','var1(t-2)','var1(t-1)','var1(t)'] ]
reordenado.dropna(inplace=True)
reordenado
/Users/jbagnato/anaconda3/envs/python36/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
weekday | month | var1(t-7) | var1(t-6) | var1(t-5) | var1(t-4) | var1(t-3) | var1(t-2) | var1(t-1) | var1(t) | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 3.0 | 1.0 | -0.314815 | -0.311111 | -0.114815 | -0.370370 | -0.714815 | -0.103704 | -0.225926 | -0.433333 |
1 | 4.0 | 1.0 | -0.311111 | -0.114815 | -0.370370 | -0.714815 | -0.103704 | -0.225926 | -0.433333 | -0.607407 |
2 | 5.0 | 1.0 | -0.114815 | -0.370370 | -0.714815 | -0.103704 | -0.225926 | -0.433333 | -0.607407 | -0.522222 |
3 | 0.0 | 1.0 | -0.370370 | -0.714815 | -0.103704 | -0.225926 | -0.433333 | -0.607407 | -0.522222 | -0.644444 |
4 | 1.0 | 1.0 | -0.714815 | -0.103704 | -0.225926 | -0.433333 | -0.607407 | -0.522222 | -0.644444 | -0.344444 |
5 | 2.0 | 1.0 | -0.103704 | -0.225926 | -0.433333 | -0.607407 | -0.522222 | -0.644444 | -0.344444 | -0.303704 |
6 | 3.0 | 1.0 | -0.225926 | -0.433333 | -0.607407 | -0.522222 | -0.644444 | -0.344444 | -0.303704 | -0.474074 |
7 | 4.0 | 1.0 | -0.433333 | -0.607407 | -0.522222 | -0.644444 | -0.344444 | -0.303704 | -0.474074 | -0.459259 |
8 | 5.0 | 1.0 | -0.607407 | -0.522222 | -0.644444 | -0.344444 | -0.303704 | -0.474074 | -0.459259 | -0.488889 |
9 | 6.0 | 1.0 | -0.522222 | -0.644444 | -0.344444 | -0.303704 | -0.474074 | -0.459259 | -0.488889 | -0.622222 |
10 | 0.0 | 1.0 | -0.644444 | -0.344444 | -0.303704 | -0.474074 | -0.459259 | -0.488889 | -0.622222 | -0.974074 |
11 | 1.0 | 1.0 | -0.344444 | -0.303704 | -0.474074 | -0.459259 | -0.488889 | -0.622222 | -0.974074 | -0.474074 |
12 | 2.0 | 1.0 | -0.303704 | -0.474074 | -0.459259 | -0.488889 | -0.622222 | -0.974074 | -0.474074 | -0.329630 |
13 | 3.0 | 1.0 | -0.474074 | -0.459259 | -0.488889 | -0.622222 | -0.974074 | -0.474074 | -0.329630 | -0.425926 |
14 | 4.0 | 1.0 | -0.459259 | -0.488889 | -0.622222 | -0.974074 | -0.474074 | -0.329630 | -0.425926 | -0.322222 |
15 | 5.0 | 1.0 | -0.488889 | -0.622222 | -0.974074 | -0.474074 | -0.329630 | -0.425926 | -0.322222 | -0.644444 |
16 | 0.0 | 1.0 | -0.622222 | -0.974074 | -0.474074 | -0.329630 | -0.425926 | -0.322222 | -0.644444 | -0.688889 |
17 | 1.0 | 1.0 | -0.974074 | -0.474074 | -0.329630 | -0.425926 | -0.322222 | -0.644444 | -0.688889 | -0.207407 |
18 | 2.0 | 2.0 | -0.474074 | -0.329630 | -0.425926 | -0.322222 | -0.644444 | -0.688889 | -0.207407 | -0.151852 |
19 | 3.0 | 2.0 | -0.329630 | -0.425926 | -0.322222 | -0.644444 | -0.688889 | -0.207407 | -0.151852 | -0.507407 |
20 | 4.0 | 2.0 | -0.425926 | -0.322222 | -0.644444 | -0.688889 | -0.207407 | -0.151852 | -0.507407 | -0.566667 |
21 | 5.0 | 2.0 | -0.322222 | -0.644444 | -0.688889 | -0.207407 | -0.151852 | -0.507407 | -0.566667 | -0.611111 |
22 | 0.0 | 2.0 | -0.644444 | -0.688889 | -0.207407 | -0.151852 | -0.507407 | -0.566667 | -0.611111 | -0.774074 |
23 | 1.0 | 2.0 | -0.688889 | -0.207407 | -0.151852 | -0.507407 | -0.566667 | -0.611111 | -0.774074 | -0.181481 |
24 | 2.0 | 2.0 | -0.207407 | -0.151852 | -0.507407 | -0.566667 | -0.611111 | -0.774074 | -0.181481 | -0.407407 |
25 | 3.0 | 2.0 | -0.151852 | -0.507407 | -0.566667 | -0.611111 | -0.774074 | -0.181481 | -0.407407 | -0.451852 |
26 | 4.0 | 2.0 | -0.507407 | -0.566667 | -0.611111 | -0.774074 | -0.181481 | -0.407407 | -0.451852 | -0.648148 |
27 | 5.0 | 2.0 | -0.566667 | -0.611111 | -0.774074 | -0.181481 | -0.407407 | -0.451852 | -0.648148 | -0.577778 |
28 | 0.0 | 2.0 | -0.611111 | -0.774074 | -0.181481 | -0.407407 | -0.451852 | -0.648148 | -0.577778 | -0.848148 |
29 | 1.0 | 2.0 | -0.774074 | -0.181481 | -0.407407 | -0.451852 | -0.648148 | -0.577778 | -0.848148 | -0.418519 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
566 | 0.0 | 10.0 | -0.737037 | -0.351852 | -0.488889 | -0.462963 | -0.414815 | -0.337037 | -0.555556 | -0.959259 |
567 | 1.0 | 10.0 | -0.351852 | -0.488889 | -0.462963 | -0.414815 | -0.337037 | -0.555556 | -0.959259 | -0.255556 |
568 | 2.0 | 10.0 | -0.488889 | -0.462963 | -0.414815 | -0.337037 | -0.555556 | -0.959259 | -0.255556 | -0.374074 |
569 | 3.0 | 11.0 | -0.462963 | -0.414815 | -0.337037 | -0.555556 | -0.959259 | -0.255556 | -0.374074 | -0.092593 |
570 | 4.0 | 11.0 | -0.414815 | -0.337037 | -0.555556 | -0.959259 | -0.255556 | -0.374074 | -0.092593 | -0.948148 |
571 | 5.0 | 11.0 | -0.337037 | -0.555556 | -0.959259 | -0.255556 | -0.374074 | -0.092593 | -0.948148 | -0.400000 |
572 | 0.0 | 11.0 | -0.555556 | -0.959259 | -0.255556 | -0.374074 | -0.092593 | -0.948148 | -0.400000 | -0.833333 |
573 | 1.0 | 11.0 | -0.959259 | -0.255556 | -0.374074 | -0.092593 | -0.948148 | -0.400000 | -0.833333 | -0.170370 |
574 | 2.0 | 11.0 | -0.255556 | -0.374074 | -0.092593 | -0.948148 | -0.400000 | -0.833333 | -0.170370 | -0.444444 |
575 | 3.0 | 11.0 | -0.374074 | -0.092593 | -0.948148 | -0.400000 | -0.833333 | -0.170370 | -0.444444 | -0.577778 |
576 | 4.0 | 11.0 | -0.092593 | -0.948148 | -0.400000 | -0.833333 | -0.170370 | -0.444444 | -0.577778 | -0.585185 |
577 | 5.0 | 11.0 | -0.948148 | -0.400000 | -0.833333 | -0.170370 | -0.444444 | -0.577778 | -0.585185 | -0.440741 |
578 | 0.0 | 11.0 | -0.400000 | -0.833333 | -0.170370 | -0.444444 | -0.577778 | -0.585185 | -0.440741 | -0.814815 |
579 | 1.0 | 11.0 | -0.833333 | -0.170370 | -0.444444 | -0.577778 | -0.585185 | -0.440741 | -0.814815 | -0.296296 |
580 | 2.0 | 11.0 | -0.170370 | -0.444444 | -0.577778 | -0.585185 | -0.440741 | -0.814815 | -0.296296 | -0.518519 |
581 | 3.0 | 11.0 | -0.444444 | -0.577778 | -0.585185 | -0.440741 | -0.814815 | -0.296296 | -0.518519 | -0.340741 |
582 | 4.0 | 11.0 | -0.577778 | -0.585185 | -0.440741 | -0.814815 | -0.296296 | -0.518519 | -0.340741 | -0.377778 |
583 | 5.0 | 11.0 | -0.585185 | -0.440741 | -0.814815 | -0.296296 | -0.518519 | -0.340741 | -0.377778 | -0.625926 |
584 | 0.0 | 11.0 | -0.440741 | -0.814815 | -0.296296 | -0.518519 | -0.340741 | -0.377778 | -0.625926 | -0.777778 |
585 | 1.0 | 11.0 | -0.814815 | -0.296296 | -0.518519 | -0.340741 | -0.377778 | -0.625926 | -0.777778 | -0.422222 |
586 | 2.0 | 11.0 | -0.296296 | -0.518519 | -0.340741 | -0.377778 | -0.625926 | -0.777778 | -0.422222 | -0.425926 |
587 | 3.0 | 11.0 | -0.518519 | -0.340741 | -0.377778 | -0.625926 | -0.777778 | -0.422222 | -0.425926 | -0.511111 |
588 | 4.0 | 11.0 | -0.340741 | -0.377778 | -0.625926 | -0.777778 | -0.422222 | -0.425926 | -0.511111 | -0.448148 |
589 | 5.0 | 11.0 | -0.377778 | -0.625926 | -0.777778 | -0.422222 | -0.425926 | -0.511111 | -0.448148 | -0.496296 |
590 | 6.0 | 11.0 | -0.625926 | -0.777778 | -0.422222 | -0.425926 | -0.511111 | -0.448148 | -0.496296 | -0.488889 |
591 | 0.0 | 11.0 | -0.777778 | -0.422222 | -0.425926 | -0.511111 | -0.448148 | -0.496296 | -0.488889 | -0.907407 |
592 | 1.0 | 11.0 | -0.422222 | -0.425926 | -0.511111 | -0.448148 | -0.496296 | -0.488889 | -0.907407 | -0.166667 |
593 | 2.0 | 11.0 | -0.425926 | -0.511111 | -0.448148 | -0.496296 | -0.488889 | -0.907407 | -0.166667 | -0.374074 |
594 | 3.0 | 11.0 | -0.511111 | -0.448148 | -0.496296 | -0.488889 | -0.907407 | -0.166667 | -0.374074 | -0.511111 |
595 | 4.0 | 11.0 | -0.448148 | -0.496296 | -0.488889 | -0.907407 | -0.166667 | -0.374074 | -0.511111 | -0.259259 |
596 rows × 10 columns
training_data = reordenado.drop('var1(t)',axis=1)#.values
target_data=reordenado['var1(t)']
#training_data.head()
valid_data = training_data[595-30:595]
valid_target=target_data[595-30:595]
training_data = training_data[0:595]
target_data=target_data[0:595]
print(training_data.shape,target_data.shape,valid_data.shape,valid_target.shape)
#training_data.head()
(595, 9) (595,) (30, 9) (30,)
def crear_modeloEmbeddings():
emb_dias = 2 #tamanio profundidad de embeddings
emb_meses = 4
in_dias = Input(shape=[1], name = 'dias')
emb_dias = Embedding(7+1, emb_dias)(in_dias)
in_meses = Input(shape=[1], name = 'meses')
emb_meses = Embedding(12+1, emb_meses)(in_meses)
in_cli = Input(shape=[PASOS], name = 'cli')
fe = concatenate([(emb_dias), (emb_meses)])
x = Flatten()(fe)
x = Dense(PASOS,activation='tanh')(x)
outp = Dense(1,activation='tanh')(x)
model = Model(inputs=[in_dias,in_meses,in_cli], outputs=outp)
model.compile(loss='mean_absolute_error',
optimizer='adam',
metrics=['MSE'])
model.summary()
return model
EPOCHS=40
model = crear_modeloEmbeddings()
continuas=training_data[['var1(t-7)','var1(t-6)','var1(t-5)','var1(t-4)','var1(t-3)','var1(t-2)','var1(t-1)']]
valid_continuas=valid_data[['var1(t-7)','var1(t-6)','var1(t-5)','var1(t-4)','var1(t-3)','var1(t-2)','var1(t-1)']]
history=model.fit([training_data['weekday'],training_data['month'],continuas], target_data, epochs=EPOCHS
,validation_data=([valid_data['weekday'],valid_data['month'],valid_continuas],valid_target))
__________________________________________________________________________________________________ Layer (type) Output Shape Param # Connected to ================================================================================================== dias (InputLayer) (None, 1) 0 __________________________________________________________________________________________________ meses (InputLayer) (None, 1) 0 __________________________________________________________________________________________________ embedding_11 (Embedding) (None, 1, 2) 16 dias[0][0] __________________________________________________________________________________________________ embedding_12 (Embedding) (None, 1, 4) 52 meses[0][0] __________________________________________________________________________________________________ concatenate_6 (Concatenate) (None, 1, 6) 0 embedding_11[0][0] embedding_12[0][0] __________________________________________________________________________________________________ flatten_6 (Flatten) (None, 6) 0 concatenate_6[0][0] __________________________________________________________________________________________________ dense_11 (Dense) (None, 7) 49 flatten_6[0][0] __________________________________________________________________________________________________ dense_12 (Dense) (None, 1) 8 dense_11[0][0] ================================================================================================== Total params: 125 Trainable params: 125 Non-trainable params: 0 __________________________________________________________________________________________________ Train on 595 samples, validate on 30 samples Epoch 1/40 595/595 [==============================] - 1s 914us/step - loss: 0.3602 - mean_squared_error: 0.1793 - val_loss: 0.3692 - val_mean_squared_error: 0.1797 Epoch 2/40 595/595 [==============================] - 0s 68us/step - loss: 0.2670 - mean_squared_error: 0.1106 - val_loss: 0.2592 - val_mean_squared_error: 0.1010 Epoch 3/40 595/595 [==============================] - 0s 68us/step - loss: 0.2053 - mean_squared_error: 0.0705 - val_loss: 0.1838 - val_mean_squared_error: 0.0595 Epoch 4/40 595/595 [==============================] - 0s 68us/step - loss: 0.1800 - mean_squared_error: 0.0545 - val_loss: 0.1545 - val_mean_squared_error: 0.0451 Epoch 5/40 595/595 [==============================] - 0s 71us/step - loss: 0.1649 - mean_squared_error: 0.0462 - val_loss: 0.1440 - val_mean_squared_error: 0.0400 Epoch 6/40 595/595 [==============================] - 0s 68us/step - loss: 0.1512 - mean_squared_error: 0.0393 - val_loss: 0.1324 - val_mean_squared_error: 0.0340 Epoch 7/40 595/595 [==============================] - 0s 69us/step - loss: 0.1405 - mean_squared_error: 0.0341 - val_loss: 0.1243 - val_mean_squared_error: 0.0301 Epoch 8/40 595/595 [==============================] - 0s 68us/step - loss: 0.1301 - mean_squared_error: 0.0295 - val_loss: 0.1146 - val_mean_squared_error: 0.0258 Epoch 9/40 595/595 [==============================] - 0s 69us/step - loss: 0.1225 - mean_squared_error: 0.0268 - val_loss: 0.1068 - val_mean_squared_error: 0.0226 Epoch 10/40 595/595 [==============================] - 0s 65us/step - loss: 0.1163 - mean_squared_error: 0.0248 - val_loss: 0.1024 - val_mean_squared_error: 0.0215 Epoch 11/40 595/595 [==============================] - 0s 71us/step - loss: 0.1120 - mean_squared_error: 0.0234 - val_loss: 0.0986 - val_mean_squared_error: 0.0209 Epoch 12/40 595/595 [==============================] - 0s 68us/step - loss: 0.1099 - mean_squared_error: 0.0228 - val_loss: 0.0964 - val_mean_squared_error: 0.0200 Epoch 13/40 595/595 [==============================] - 0s 69us/step - loss: 0.1087 - mean_squared_error: 0.0226 - val_loss: 0.0960 - val_mean_squared_error: 0.0201 Epoch 14/40 595/595 [==============================] - 0s 68us/step - loss: 0.1080 - mean_squared_error: 0.0224 - val_loss: 0.0945 - val_mean_squared_error: 0.0199 Epoch 15/40 595/595 [==============================] - 0s 69us/step - loss: 0.1082 - mean_squared_error: 0.0226 - val_loss: 0.0944 - val_mean_squared_error: 0.0200 Epoch 16/40 595/595 [==============================] - 0s 67us/step - loss: 0.1079 - mean_squared_error: 0.0225 - val_loss: 0.0952 - val_mean_squared_error: 0.0199 Epoch 17/40 595/595 [==============================] - 0s 65us/step - loss: 0.1077 - mean_squared_error: 0.0223 - val_loss: 0.0938 - val_mean_squared_error: 0.0197 Epoch 18/40 595/595 [==============================] - 0s 61us/step - loss: 0.1078 - mean_squared_error: 0.0223 - val_loss: 0.0960 - val_mean_squared_error: 0.0201 Epoch 19/40 595/595 [==============================] - 0s 65us/step - loss: 0.1076 - mean_squared_error: 0.0224 - val_loss: 0.0956 - val_mean_squared_error: 0.0200 Epoch 20/40 595/595 [==============================] - 0s 61us/step - loss: 0.1079 - mean_squared_error: 0.0226 - val_loss: 0.0958 - val_mean_squared_error: 0.0201 Epoch 21/40 595/595 [==============================] - 0s 69us/step - loss: 0.1077 - mean_squared_error: 0.0223 - val_loss: 0.0938 - val_mean_squared_error: 0.0197 Epoch 22/40 595/595 [==============================] - 0s 70us/step - loss: 0.1080 - mean_squared_error: 0.0225 - val_loss: 0.0953 - val_mean_squared_error: 0.0197 Epoch 23/40 595/595 [==============================] - 0s 63us/step - loss: 0.1079 - mean_squared_error: 0.0224 - val_loss: 0.0934 - val_mean_squared_error: 0.0194 Epoch 24/40 595/595 [==============================] - 0s 72us/step - loss: 0.1078 - mean_squared_error: 0.0224 - val_loss: 0.0942 - val_mean_squared_error: 0.0193 Epoch 25/40 595/595 [==============================] - 0s 59us/step - loss: 0.1078 - mean_squared_error: 0.0223 - val_loss: 0.0964 - val_mean_squared_error: 0.0199 Epoch 26/40 595/595 [==============================] - 0s 60us/step - loss: 0.1080 - mean_squared_error: 0.0226 - val_loss: 0.0942 - val_mean_squared_error: 0.0196 Epoch 27/40 595/595 [==============================] - 0s 67us/step - loss: 0.1081 - mean_squared_error: 0.0224 - val_loss: 0.0956 - val_mean_squared_error: 0.0201 Epoch 28/40 595/595 [==============================] - 0s 62us/step - loss: 0.1076 - mean_squared_error: 0.0224 - val_loss: 0.0938 - val_mean_squared_error: 0.0197 Epoch 29/40 595/595 [==============================] - 0s 89us/step - loss: 0.1076 - mean_squared_error: 0.0223 - val_loss: 0.0945 - val_mean_squared_error: 0.0199 Epoch 30/40 595/595 [==============================] - 0s 118us/step - loss: 0.1077 - mean_squared_error: 0.0223 - val_loss: 0.0948 - val_mean_squared_error: 0.0197 Epoch 31/40 595/595 [==============================] - 0s 72us/step - loss: 0.1079 - mean_squared_error: 0.0226 - val_loss: 0.0938 - val_mean_squared_error: 0.0195 Epoch 32/40 595/595 [==============================] - 0s 70us/step - loss: 0.1080 - mean_squared_error: 0.0225 - val_loss: 0.0950 - val_mean_squared_error: 0.0197 Epoch 33/40 595/595 [==============================] - 0s 60us/step - loss: 0.1077 - mean_squared_error: 0.0224 - val_loss: 0.0922 - val_mean_squared_error: 0.0194 Epoch 34/40 595/595 [==============================] - 0s 62us/step - loss: 0.1078 - mean_squared_error: 0.0224 - val_loss: 0.0942 - val_mean_squared_error: 0.0197 Epoch 35/40 595/595 [==============================] - 0s 62us/step - loss: 0.1076 - mean_squared_error: 0.0225 - val_loss: 0.0949 - val_mean_squared_error: 0.0198 Epoch 36/40 595/595 [==============================] - 0s 64us/step - loss: 0.1078 - mean_squared_error: 0.0225 - val_loss: 0.0947 - val_mean_squared_error: 0.0199 Epoch 37/40 595/595 [==============================] - 0s 61us/step - loss: 0.1077 - mean_squared_error: 0.0225 - val_loss: 0.0960 - val_mean_squared_error: 0.0201 Epoch 38/40 595/595 [==============================] - 0s 59us/step - loss: 0.1076 - mean_squared_error: 0.0224 - val_loss: 0.0964 - val_mean_squared_error: 0.0201 Epoch 39/40 595/595 [==============================] - 0s 61us/step - loss: 0.1078 - mean_squared_error: 0.0225 - val_loss: 0.0953 - val_mean_squared_error: 0.0199 Epoch 40/40 595/595 [==============================] - 0s 64us/step - loss: 0.1080 - mean_squared_error: 0.0224 - val_loss: 0.0964 - val_mean_squared_error: 0.0204
results=model.predict([valid_data['weekday'],valid_data['month'],valid_continuas])
print( len(results) )
plt.scatter(range(len(valid_target)),valid_target,c='g')
plt.scatter(range(len(results)),results,c='r')
plt.title('validate')
plt.show()
30
plt.plot(history.history['loss'])
plt.title('loss')
plt.plot(history.history['val_loss'])
plt.title('validate loss')
plt.show()
plt.title('Accuracy')
plt.plot(history.history['mean_squared_error'])
plt.show()
compara = pd.DataFrame(np.array([valid_target, [x[0] for x in results]])).transpose()
compara.columns = ['real', 'prediccion']
inverted = scaler.inverse_transform(compara.values)
compara2 = pd.DataFrame(inverted)
compara2.columns = ['real', 'prediccion']
compara2['diferencia'] = compara2['real'] - compara2['prediccion']
compara2.head(12)
real | prediccion | diferencia | |
---|---|---|---|
0 | 171.000000 | 180.597778 | -9.597778 |
1 | 62.000004 | 107.018166 | -45.018162 |
2 | 252.000000 | 266.338470 | -14.338470 |
3 | 220.000000 | 234.285080 | -14.285080 |
4 | 296.000000 | 197.776123 | 98.223877 |
5 | 64.999992 | 204.157654 | -139.157654 |
6 | 213.000000 | 197.658493 | 15.341507 |
7 | 95.999992 | 100.203423 | -4.203430 |
8 | 275.000000 | 247.119278 | 27.880722 |
9 | 201.000000 | 216.487488 | -15.487488 |
10 | 165.000000 | 197.776123 | -32.776123 |
11 | 163.000000 | 204.157654 | -41.157654 |
compara2.describe()
real | prediccion | diferencia | |
---|---|---|---|
count | 30.000000 | 30.000000 | 30.000000 |
mean | 184.733337 | 193.536270 | -8.802909 |
std | 61.144787 | 47.120979 | 38.144459 |
min | 62.000004 | 100.203423 | -139.157654 |
25% | 163.500000 | 197.658493 | -22.024433 |
50% | 194.500000 | 197.776123 | -10.042633 |
75% | 219.750000 | 216.487488 | 9.182810 |
max | 296.000000 | 266.338470 | 98.223877 |
compara2['real'].plot()
compara2['prediccion'].plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1a2a4f7278>
A partir de la última semana de noviembre 2018, intentaremos predecir la primer semana de diciembre.
ultimosDias = df['2018-11-16':'2018-11-30']
ultimosDias
unidades | weekday | month | |
---|---|---|---|
fecha | |||
2018-11-16 | 152 | 4 | 11 |
2018-11-17 | 111 | 5 | 11 |
2018-11-19 | 207 | 0 | 11 |
2018-11-20 | 206 | 1 | 11 |
2018-11-21 | 183 | 2 | 11 |
2018-11-22 | 200 | 3 | 11 |
2018-11-23 | 187 | 4 | 11 |
2018-11-24 | 189 | 5 | 11 |
2018-11-25 | 76 | 6 | 11 |
2018-11-26 | 276 | 0 | 11 |
2018-11-27 | 220 | 1 | 11 |
2018-11-28 | 183 | 2 | 11 |
2018-11-29 | 251 | 3 | 11 |
2018-11-30 | 189 | 4 | 11 |
# load dataset
values = ultimosDias['unidades'].values
# ensure all data is float
values = values.astype('float32')
# normalize features
#scaler = MinMaxScaler(feature_range=(-1, 1))
values=values.reshape(-1, 1) # esto lo hacemos porque tenemos 1 sola dimension
scaled = scaler.fit_transform(values)
reframed = series_to_supervised(scaled, PASOS, 1)
reframed.reset_index(inplace=True, drop=True)
contador=0
reframed['weekday']=ultimosDias['weekday']
reframed['month']=ultimosDias['month']
for i in range(reframed.index[0],reframed.index[-1]):
reframed['weekday'].loc[contador]=ultimosDias['weekday'][i+8]
reframed['month'].loc[contador]=ultimosDias['month'][i+8]
contador=contador+1
reframed.head()
reordenado=reframed[ ['weekday','month','var1(t-7)','var1(t-6)','var1(t-5)','var1(t-4)','var1(t-3)','var1(t-2)','var1(t-1)'] ]
reordenado.dropna(inplace=True)
reordenado
/Users/jbagnato/anaconda3/envs/python36/lib/python3.6/site-packages/pandas/core/indexing.py:194: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy self._setitem_with_indexer(indexer, value) /Users/jbagnato/anaconda3/envs/python36/lib/python3.6/site-packages/ipykernel_launcher.py:27: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
weekday | month | var1(t-7) | var1(t-6) | var1(t-5) | var1(t-4) | var1(t-3) | var1(t-2) | var1(t-1) | |
---|---|---|---|---|---|---|---|---|---|
0 | 6.0 | 11.0 | -0.24 | -0.65 | 0.31 | 0.30 | 0.07 | 0.24 | 0.11 |
1 | 0.0 | 11.0 | -0.65 | 0.31 | 0.30 | 0.07 | 0.24 | 0.11 | 0.13 |
2 | 1.0 | 11.0 | 0.31 | 0.30 | 0.07 | 0.24 | 0.11 | 0.13 | -1.00 |
3 | 2.0 | 11.0 | 0.30 | 0.07 | 0.24 | 0.11 | 0.13 | -1.00 | 1.00 |
4 | 3.0 | 11.0 | 0.07 | 0.24 | 0.11 | 0.13 | -1.00 | 1.00 | 0.44 |
5 | 4.0 | 11.0 | 0.24 | 0.11 | 0.13 | -1.00 | 1.00 | 0.44 | 0.07 |
values = reordenado.values
x_test = values[5:, :]
x_test = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))
print(x_test.shape)
print(x_test)
ultDiaSemana = reordenado.weekday[len(reordenado.index)-1]
ultDiaSemana
(1, 1, 9) [[[ 4. 11. 0.24000001 0.11000001 0.13 -1. 1. 0.44000006 0.06999993]]]
4.0
def agregarNuevoValor(x_test,nuevoValor,ultDiaSemana):
for i in range(x_test.shape[2]-3):
x_test[0][0][i+2] = x_test[0][0][i+3]
ultDiaSemana=ultDiaSemana+1
if ultDiaSemana>6:
ultDiaSemana=0
x_test[0][0][0]=ultDiaSemana
x_test[0][0][1]=12
x_test[0][0][x_test.shape[2]-1]=nuevoValor
return x_test,ultDiaSemana
results=[]
for i in range(7):
dia=np.array([x_test[0][0][0]])
mes=np.array([x_test[0][0][1]])
valores=np.array([x_test[0][0][2:9]])
parcial=model.predict([dia, mes, valores])
results.append(parcial[0])
print('pred',i,x_test)
x_test,ultDiaSemana=agregarNuevoValor(x_test,parcial[0],ultDiaSemana)
pred 0 [[[ 4. 11. 0.24000001 0.11000001 0.13 -1. 1. 0.44000006 0.06999993]]] pred 1 [[[ 5. 12. 0.11000001 0.13 -1. 1. 0.44000006 0.06999993 -0.43274945]]] pred 2 [[[ 6. 12. 0.13 -1. 1. 0.44000006 0.06999993 -0.43274945 -0.32928693]]] pred 3 [[[ 0. 12. -1. 1. 0.44000006 0.06999993 -0.43274945 -0.32928693 -0.46238086]]] pred 4 [[[ 1. 12. 1. 0.44000006 0.06999993 -0.43274945 -0.32928693 -0.46238086 -0.76184267]]] pred 5 [[[ 2. 12. 0.44000006 0.06999993 -0.43274945 -0.32928693 -0.46238086 -0.76184267 -0.12789321]]] pred 6 [[[ 3. 12. 0.06999993 -0.43274945 -0.32928693 -0.46238086 -0.76184267 -0.12789321 -0.25119212]]]
adimen = [x for x in results]
print(adimen)
inverted = scaler.inverse_transform(adimen)
inverted
[array([-0.43274945], dtype=float32), array([-0.32928693], dtype=float32), array([-0.46238086], dtype=float32), array([-0.76184267], dtype=float32), array([-0.12789321], dtype=float32), array([-0.25119212], dtype=float32), array([-0.32878956], dtype=float32)]
array([[132.72505699], [143.07130895], [129.76191632], [ 99.81573447], [163.21068175], [150.88079015], [143.12104605]])
prediccion1SemanaDiciembre = pd.DataFrame(inverted)
prediccion1SemanaDiciembre.columns = ['pronostico']
prediccion1SemanaDiciembre.plot()
prediccion1SemanaDiciembre.to_csv('pronostico_embeddings.csv')
prediccion1SemanaDiciembre
pronostico | |
---|---|
0 | 132.725057 |
1 | 143.071309 |
2 | 129.761916 |
3 | 99.815734 |
4 | 163.210682 |
5 | 150.880790 |
6 | 143.121046 |
El artículo completo en www.aprendemachinelearning.com