El artĂculo completo con la explicaciĂłn detallada en el blog: http://www.aprendemachinelearning.com/
Importamos las LibrerĂas que vamos a utilizar
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (16, 9)
plt.style.use('fast')
from keras.models import Sequential
from keras.layers import Dense,Activation,Flatten
from sklearn.preprocessing import MinMaxScaler
Using TensorFlow backend.
df = pd.read_csv('time_series.csv', parse_dates=[0], header=None,index_col=0, squeeze=True,names=['fecha','unidades'])
df.head()
fecha 2017-01-02 236 2017-01-03 237 2017-01-04 290 2017-01-05 221 2017-01-07 128 Name: unidades, dtype: int64
df.describe()
count 604.000000 mean 215.935430 std 75.050304 min 51.000000 25% 171.000000 50% 214.000000 75% 261.250000 max 591.000000 Name: unidades, dtype: float64
print(df.index.min())
print(df.index.max())
2017-01-02 00:00:00 2018-11-30 00:00:00
print(len(df['2017']))
print(len(df['2018']))
315 289
meses =df.resample('M').mean()
meses
fecha 2017-01-31 203.923077 2017-02-28 184.666667 2017-03-31 182.964286 2017-04-30 198.960000 2017-05-31 201.185185 2017-06-30 209.518519 2017-07-31 278.923077 2017-08-31 316.000000 2017-09-30 222.925926 2017-10-31 207.851852 2017-11-30 185.925926 2017-12-31 213.200000 2018-01-31 201.384615 2018-02-28 190.625000 2018-03-31 174.846154 2018-04-30 186.000000 2018-05-31 190.666667 2018-06-30 196.037037 2018-07-31 289.500000 2018-08-31 309.038462 2018-09-30 230.518519 2018-10-31 209.444444 2018-11-30 184.481481 Freq: M, Name: unidades, dtype: float64
plt.plot(meses['2017'].values)
plt.plot(meses['2018'].values)
[<matplotlib.lines.Line2D at 0x1a30ec5860>]
verano2017 = df['2017-06-01':'2017-09-01']
plt.plot(verano2017.values)
verano2018 = df['2018-06-01':'2018-09-01']
plt.plot(verano2018.values)
[<matplotlib.lines.Line2D at 0x1a31776470>]
PASOS=7
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = pd.DataFrame(data)
cols, names = list(), list()
# input sequence (t-n, ... t-1)
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
# forecast sequence (t, t+1, ... t+n)
for i in range(0, n_out):
cols.append(df.shift(-i))
if i == 0:
names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
else:
names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
# put it all together
agg = pd.concat(cols, axis=1)
agg.columns = names
# drop rows with NaN values
if dropnan:
agg.dropna(inplace=True)
return agg
# load dataset
values = df.values
# ensure all data is float
values = values.astype('float32')
# normalize features
scaler = MinMaxScaler(feature_range=(-1, 1))
values=values.reshape(-1, 1) # esto lo hacemos porque tenemos 1 sola dimension
scaled = scaler.fit_transform(values)
# frame as supervised learning
reframed = series_to_supervised(scaled, PASOS, 1)
reframed.head()
var1(t-7) | var1(t-6) | var1(t-5) | var1(t-4) | var1(t-3) | var1(t-2) | var1(t-1) | var1(t) | |
---|---|---|---|---|---|---|---|---|
7 | -0.314815 | -0.311111 | -0.114815 | -0.370370 | -0.714815 | -0.103704 | -0.225926 | -0.433333 |
8 | -0.311111 | -0.114815 | -0.370370 | -0.714815 | -0.103704 | -0.225926 | -0.433333 | -0.607407 |
9 | -0.114815 | -0.370370 | -0.714815 | -0.103704 | -0.225926 | -0.433333 | -0.607407 | -0.522222 |
10 | -0.370370 | -0.714815 | -0.103704 | -0.225926 | -0.433333 | -0.607407 | -0.522222 | -0.644444 |
11 | -0.714815 | -0.103704 | -0.225926 | -0.433333 | -0.607407 | -0.522222 | -0.644444 | -0.344444 |
# split into train and test sets
values = reframed.values
n_train_days = 315+289 - (30+PASOS)
train = values[:n_train_days, :]
test = values[n_train_days:, :]
# split into input and outputs
x_train, y_train = train[:, :-1], train[:, -1]
x_val, y_val = test[:, :-1], test[:, -1]
# reshape input to be 3D [samples, timesteps, features]
x_train = x_train.reshape((x_train.shape[0], 1, x_train.shape[1]))
x_val = x_val.reshape((x_val.shape[0], 1, x_val.shape[1]))
print(x_train.shape, y_train.shape, x_val.shape, y_val.shape)
(567, 1, 7) (567,) (30, 1, 7) (30,)
def crear_modeloFF():
model = Sequential()
model.add(Dense(PASOS, input_shape=(1,PASOS),activation='tanh'))
model.add(Flatten())
model.add(Dense(1, activation='tanh'))
model.compile(loss='mean_absolute_error',optimizer='Adam',metrics=["mse"])
model.summary()
return model
EPOCHS=40
model = crear_modeloFF()
history=model.fit(x_train,y_train,epochs=EPOCHS,validation_data=(x_val,y_val),batch_size=PASOS)
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_1 (Dense) (None, 1, 7) 56 _________________________________________________________________ flatten_1 (Flatten) (None, 7) 0 _________________________________________________________________ dense_2 (Dense) (None, 1) 8 ================================================================= Total params: 64 Trainable params: 64 Non-trainable params: 0 _________________________________________________________________ Train on 567 samples, validate on 30 samples Epoch 1/40 567/567 [==============================] - 0s 795us/step - loss: 0.3436 - mean_squared_error: 0.1881 - val_loss: 0.1672 - val_mean_squared_error: 0.0444 Epoch 2/40 567/567 [==============================] - 0s 255us/step - loss: 0.2224 - mean_squared_error: 0.0793 - val_loss: 0.1621 - val_mean_squared_error: 0.0426 Epoch 3/40 567/567 [==============================] - 0s 248us/step - loss: 0.2096 - mean_squared_error: 0.0719 - val_loss: 0.1569 - val_mean_squared_error: 0.0411 Epoch 4/40 567/567 [==============================] - 0s 241us/step - loss: 0.1984 - mean_squared_error: 0.0660 - val_loss: 0.1562 - val_mean_squared_error: 0.0367 Epoch 5/40 567/567 [==============================] - 0s 252us/step - loss: 0.1895 - mean_squared_error: 0.0618 - val_loss: 0.1506 - val_mean_squared_error: 0.0374 Epoch 6/40 567/567 [==============================] - 0s 253us/step - loss: 0.1827 - mean_squared_error: 0.0589 - val_loss: 0.1480 - val_mean_squared_error: 0.0389 Epoch 7/40 567/567 [==============================] - 0s 307us/step - loss: 0.1775 - mean_squared_error: 0.0575 - val_loss: 0.1518 - val_mean_squared_error: 0.0361 Epoch 8/40 567/567 [==============================] - 0s 273us/step - loss: 0.1758 - mean_squared_error: 0.0565 - val_loss: 0.1469 - val_mean_squared_error: 0.0388 Epoch 9/40 567/567 [==============================] - 0s 254us/step - loss: 0.1730 - mean_squared_error: 0.0556 - val_loss: 0.1411 - val_mean_squared_error: 0.0360 Epoch 10/40 567/567 [==============================] - 0s 278us/step - loss: 0.1719 - mean_squared_error: 0.0554 - val_loss: 0.1401 - val_mean_squared_error: 0.0360 Epoch 11/40 567/567 [==============================] - 0s 263us/step - loss: 0.1711 - mean_squared_error: 0.0552 - val_loss: 0.1415 - val_mean_squared_error: 0.0368 Epoch 12/40 567/567 [==============================] - 0s 275us/step - loss: 0.1705 - mean_squared_error: 0.0563 - val_loss: 0.1392 - val_mean_squared_error: 0.0350 Epoch 13/40 567/567 [==============================] - 0s 275us/step - loss: 0.1704 - mean_squared_error: 0.0560 - val_loss: 0.1387 - val_mean_squared_error: 0.0356 Epoch 14/40 567/567 [==============================] - 0s 336us/step - loss: 0.1701 - mean_squared_error: 0.0560 - val_loss: 0.1397 - val_mean_squared_error: 0.0356 Epoch 15/40 567/567 [==============================] - 0s 428us/step - loss: 0.1703 - mean_squared_error: 0.0564 - val_loss: 0.1393 - val_mean_squared_error: 0.0353 Epoch 16/40 567/567 [==============================] - 0s 498us/step - loss: 0.1712 - mean_squared_error: 0.0564 - val_loss: 0.1398 - val_mean_squared_error: 0.0359 Epoch 17/40 567/567 [==============================] - 0s 458us/step - loss: 0.1692 - mean_squared_error: 0.0557 - val_loss: 0.1397 - val_mean_squared_error: 0.0352 Epoch 18/40 567/567 [==============================] - 0s 316us/step - loss: 0.1691 - mean_squared_error: 0.0556 - val_loss: 0.1408 - val_mean_squared_error: 0.0348 Epoch 19/40 567/567 [==============================] - 0s 273us/step - loss: 0.1684 - mean_squared_error: 0.0557 - val_loss: 0.1438 - val_mean_squared_error: 0.0378 Epoch 20/40 567/567 [==============================] - 0s 447us/step - loss: 0.1691 - mean_squared_error: 0.0555 - val_loss: 0.1394 - val_mean_squared_error: 0.0352 Epoch 21/40 567/567 [==============================] - 0s 554us/step - loss: 0.1687 - mean_squared_error: 0.0559 - val_loss: 0.1407 - val_mean_squared_error: 0.0350 Epoch 22/40 567/567 [==============================] - 0s 518us/step - loss: 0.1688 - mean_squared_error: 0.0559 - val_loss: 0.1409 - val_mean_squared_error: 0.0362 Epoch 23/40 567/567 [==============================] - 0s 334us/step - loss: 0.1689 - mean_squared_error: 0.0557 - val_loss: 0.1399 - val_mean_squared_error: 0.0356 Epoch 24/40 567/567 [==============================] - 0s 269us/step - loss: 0.1689 - mean_squared_error: 0.0558 - val_loss: 0.1412 - val_mean_squared_error: 0.0364 Epoch 25/40 567/567 [==============================] - 0s 259us/step - loss: 0.1690 - mean_squared_error: 0.0559 - val_loss: 0.1403 - val_mean_squared_error: 0.0350 Epoch 26/40 567/567 [==============================] - 0s 321us/step - loss: 0.1685 - mean_squared_error: 0.0553 - val_loss: 0.1402 - val_mean_squared_error: 0.0358 Epoch 27/40 567/567 [==============================] - 0s 313us/step - loss: 0.1686 - mean_squared_error: 0.0557 - val_loss: 0.1398 - val_mean_squared_error: 0.0352 Epoch 28/40 567/567 [==============================] - 0s 273us/step - loss: 0.1693 - mean_squared_error: 0.0555 - val_loss: 0.1409 - val_mean_squared_error: 0.0357 Epoch 29/40 567/567 [==============================] - 0s 347us/step - loss: 0.1694 - mean_squared_error: 0.0559 - val_loss: 0.1404 - val_mean_squared_error: 0.0359 Epoch 30/40 567/567 [==============================] - 0s 322us/step - loss: 0.1691 - mean_squared_error: 0.0557 - val_loss: 0.1413 - val_mean_squared_error: 0.0348 Epoch 31/40 567/567 [==============================] - 0s 322us/step - loss: 0.1712 - mean_squared_error: 0.0576 - val_loss: 0.1408 - val_mean_squared_error: 0.0362 Epoch 32/40 567/567 [==============================] - 0s 335us/step - loss: 0.1689 - mean_squared_error: 0.0554 - val_loss: 0.1418 - val_mean_squared_error: 0.0348 Epoch 33/40 567/567 [==============================] - 0s 331us/step - loss: 0.1680 - mean_squared_error: 0.0553 - val_loss: 0.1435 - val_mean_squared_error: 0.0353 Epoch 34/40 567/567 [==============================] - 0s 336us/step - loss: 0.1688 - mean_squared_error: 0.0555 - val_loss: 0.1409 - val_mean_squared_error: 0.0360 Epoch 35/40 567/567 [==============================] - 0s 331us/step - loss: 0.1681 - mean_squared_error: 0.0558 - val_loss: 0.1401 - val_mean_squared_error: 0.0350 Epoch 36/40 567/567 [==============================] - 0s 313us/step - loss: 0.1679 - mean_squared_error: 0.0551 - val_loss: 0.1424 - val_mean_squared_error: 0.0351 Epoch 37/40 567/567 [==============================] - 0s 321us/step - loss: 0.1682 - mean_squared_error: 0.0553 - val_loss: 0.1406 - val_mean_squared_error: 0.0351 Epoch 38/40 567/567 [==============================] - 0s 339us/step - loss: 0.1684 - mean_squared_error: 0.0558 - val_loss: 0.1403 - val_mean_squared_error: 0.0353 Epoch 39/40 567/567 [==============================] - 0s 336us/step - loss: 0.1683 - mean_squared_error: 0.0555 - val_loss: 0.1405 - val_mean_squared_error: 0.0353 Epoch 40/40 567/567 [==============================] - 0s 342us/step - loss: 0.1682 - mean_squared_error: 0.0555 - val_loss: 0.1402 - val_mean_squared_error: 0.0353
results=model.predict(x_val)
print( len(results) )
plt.scatter(range(len(y_val)),y_val,c='g')
plt.scatter(range(len(results)),results,c='r')
plt.title('validate')
plt.show()
30
plt.plot(history.history['loss'])
plt.title('loss')
plt.plot(history.history['val_loss'])
plt.title('validate loss')
plt.show()
plt.title('Accuracy')
plt.plot(history.history['mean_squared_error'])
plt.show()
compara = pd.DataFrame(np.array([y_val, [x[0] for x in results]])).transpose()
compara.columns = ['real', 'prediccion']
inverted = scaler.inverse_transform(compara.values)
compara2 = pd.DataFrame(inverted)
compara2.columns = ['real', 'prediccion']
compara2['diferencia'] = compara2['real'] - compara2['prediccion']
compara2.head()
(30, 1, 7) (30,) 30
real | prediccion | diferencia | |
---|---|---|---|
0 | 252.000000 | 214.163651 | 37.836349 |
1 | 220.000000 | 203.868866 | 16.131134 |
2 | 296.000000 | 198.812180 | 97.187820 |
3 | 64.999992 | 194.746170 | -129.746185 |
4 | 213.000000 | 196.406677 | 16.593323 |
compara2.describe()
real | prediccion | diferencia | |
---|---|---|---|
count | 30.000000 | 30.000000 | 30.000000 |
mean | 191.633331 | 196.321518 | -4.688193 |
std | 57.580818 | 23.251623 | 51.338474 |
min | 64.999992 | 150.876389 | -129.746185 |
25% | 169.000000 | 182.479301 | -24.522148 |
50% | 200.500000 | 197.162064 | 1.159111 |
75% | 220.000000 | 207.022148 | 26.922920 |
max | 296.000000 | 241.253464 | 97.187820 |
compara2['real'].plot()
compara2['prediccion'].plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1a31d9f0f0>
A partir de la Ășltima semana de noviembre 2018, intentaremos predecir la primer semana de diciembre.
ultimosDias = df['2018-11-16':'2018-11-30']
ultimosDias
fecha 2018-11-16 152 2018-11-17 111 2018-11-19 207 2018-11-20 206 2018-11-21 183 2018-11-22 200 2018-11-23 187 2018-11-24 189 2018-11-25 76 2018-11-26 276 2018-11-27 220 2018-11-28 183 2018-11-29 251 2018-11-30 189 Name: unidades, dtype: int64
values = ultimosDias.values
values = values.astype('float32')
# normalize features
values=values.reshape(-1, 1) # esto lo hacemos porque tenemos 1 sola dimension
scaled = scaler.fit_transform(values)
reframed = series_to_supervised(scaled, PASOS, 1)
reframed.drop(reframed.columns[[7]], axis=1, inplace=True)
reframed.head(7)
var1(t-7) | var1(t-6) | var1(t-5) | var1(t-4) | var1(t-3) | var1(t-2) | var1(t-1) | |
---|---|---|---|---|---|---|---|
7 | -0.24 | -0.65 | 0.31 | 0.30 | 0.07 | 0.24 | 0.11 |
8 | -0.65 | 0.31 | 0.30 | 0.07 | 0.24 | 0.11 | 0.13 |
9 | 0.31 | 0.30 | 0.07 | 0.24 | 0.11 | 0.13 | -1.00 |
10 | 0.30 | 0.07 | 0.24 | 0.11 | 0.13 | -1.00 | 1.00 |
11 | 0.07 | 0.24 | 0.11 | 0.13 | -1.00 | 1.00 | 0.44 |
12 | 0.24 | 0.11 | 0.13 | -1.00 | 1.00 | 0.44 | 0.07 |
13 | 0.11 | 0.13 | -1.00 | 1.00 | 0.44 | 0.07 | 0.75 |
values = reframed.values
x_test = values[6:, :]
x_test = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))
print(x_test.shape)
x_test
(1, 1, 7)
array([[[ 0.11000001, 0.13 , -1. , 1. , 0.44000006, 0.06999993, 0.75 ]]], dtype=float32)
def agregarNuevoValor(x_test,nuevoValor):
for i in range(x_test.shape[2]-1):
x_test[0][0][i] = x_test[0][0][i+1]
x_test[0][0][x_test.shape[2]-1]=nuevoValor
return x_test
results=[]
for i in range(7):
parcial=model.predict(x_test)
results.append(parcial[0])
print(x_test)
x_test=agregarNuevoValor(x_test,parcial[0])
[[[ 0.11000001 0.13 -1. 1. 0.44000006 0.06999993 0.75 ]]] [[[ 0.13 -1. 1. 0.44000006 0.06999993 0.75 -0.01510962]]] [[[-1. 1. 0.44000006 0.06999993 0.75 -0.01510962 -0.3473066 ]]] [[[ 1. 0.44000006 0.06999993 0.75 -0.01510962 -0.3473066 0.4949292 ]]] [[[ 0.44000006 0.06999993 0.75 -0.01510962 -0.3473066 0.4949292 0.2773262 ]]] [[[ 0.06999993 0.75 -0.01510962 -0.3473066 0.4949292 0.2773262 0.01309414]]] [[[ 0.75 -0.01510962 -0.3473066 0.4949292 0.2773262 0.01309414 0.32155222]]]
adimen = [x for x in results]
print(adimen)
inverted = scaler.inverse_transform(adimen)
inverted
[array([-0.01510962], dtype=float32), array([-0.3473066], dtype=float32), array([0.4949292], dtype=float32), array([0.2773262], dtype=float32), array([0.01309414], dtype=float32), array([0.32155222], dtype=float32), array([-0.00763017], dtype=float32)]
array([[174.48904094], [141.26934129], [225.49292353], [203.73262324], [177.30941712], [208.1552254 ], [175.23698644]])
prediccion1SemanaDiciembre = pd.DataFrame(inverted)
prediccion1SemanaDiciembre.columns = ['pronostico']
prediccion1SemanaDiciembre.plot()
prediccion1SemanaDiciembre.to_csv('pronostico.csv')
prediccion1SemanaDiciembre
pronostico | |
---|---|
0 | 174.489041 |
1 | 141.269341 |
2 | 225.492924 |
3 | 203.732623 |
4 | 177.309417 |
5 | 208.155225 |
6 | 175.236986 |
i=0
for fila in prediccion1SemanaDiciembre.pronostico:
i=i+1
ultimosDias.loc['2018-12-0' + str(i) + ' 00:00:00'] = fila
print(fila)
ultimosDias.tail(14)
174.48904094075502 141.26934128856678 225.49292353152313 203.73262324331603 177.30941712430308 208.15522539962586 175.23698644310952
fecha 2018-11-24 00:00:00 189.000000 2018-11-25 00:00:00 76.000000 2018-11-26 00:00:00 276.000000 2018-11-27 00:00:00 220.000000 2018-11-28 00:00:00 183.000000 2018-11-29 00:00:00 251.000000 2018-11-30 00:00:00 189.000000 2018-12-01 00:00:00 174.489041 2018-12-02 00:00:00 141.269341 2018-12-03 00:00:00 225.492924 2018-12-04 00:00:00 203.732623 2018-12-05 00:00:00 177.309417 2018-12-06 00:00:00 208.155225 2018-12-07 00:00:00 175.236986 Name: unidades, dtype: float64
El artĂculo completo en www.aprendemachinelearning.com