#!/usr/bin/env python # coding: utf-8 # # Predicción de Series Temporales NN - Embeddings # El artículo completo con la explicación detallada en el blog: http://www.aprendemachinelearning.com/ # ## Usaremos Keras y Tensorflow # Importamos las Librerías que vamos a utilizar # In[47]: import pandas as pd import numpy as np import matplotlib.pylab as plt get_ipython().run_line_magic('matplotlib', 'inline') plt.rcParams['figure.figsize'] = (16, 9) plt.style.use('fast') from keras.models import Sequential from keras.layers import Dense,Activation,Flatten from sklearn.preprocessing import MinMaxScaler from keras.layers import Input, Embedding, Dense, Flatten, Dropout, concatenate, LSTM from keras.layers import BatchNormalization, SpatialDropout1D from keras.callbacks import Callback from keras.models import Model from keras.optimizers import Adam # ### Cargamos nuestro Dataset # In[48]: df = pd.read_csv('time_series.csv', parse_dates=[0], header=None,index_col=0, names=['fecha','unidades']) df.head() # In[49]: df['weekday']=[x.weekday() for x in df.index] df['month']=[x.month for x in df.index] df.head() # In[50]: df.describe() # # Preprocesado de los datos # In[51]: PASOS=7 # convert series to supervised learning def series_to_supervised(data, n_in=1, n_out=1, dropnan=True): n_vars = 1 if type(data) is list else data.shape[1] df = pd.DataFrame(data) cols, names = list(), list() # input sequence (t-n, ... t-1) for i in range(n_in, 0, -1): cols.append(df.shift(i)) names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)] # forecast sequence (t, t+1, ... t+n) for i in range(0, n_out): cols.append(df.shift(-i)) if i == 0: names += [('var%d(t)' % (j+1)) for j in range(n_vars)] else: names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)] # put it all together agg = pd.concat(cols, axis=1) agg.columns = names # drop rows with NaN values if dropnan: agg.dropna(inplace=True) return agg # In[52]: # load dataset values = df['unidades'].values # ensure all data is float values = values.astype('float32') # normalize features scaler = MinMaxScaler(feature_range=(-1, 1)) values=values.reshape(-1, 1) # esto lo hacemos porque tenemos 1 sola dimension scaled = scaler.fit_transform(values) reframed = series_to_supervised(scaled, PASOS, 1) reframed.reset_index(inplace=True, drop=True) contador=0 reframed['weekday']=df['weekday'] reframed['month']=df['month'] for i in range(reframed.index[0],reframed.index[-1]): reframed['weekday'].loc[contador]=df['weekday'][i+8] reframed['month'].loc[contador]=df['month'][i+8] contador=contador+1 reframed.head() # In[53]: reordenado=reframed[ ['weekday','month','var1(t-7)','var1(t-6)','var1(t-5)','var1(t-4)','var1(t-3)','var1(t-2)','var1(t-1)','var1(t)'] ] reordenado.dropna(inplace=True) reordenado # ## Dividimos en set de Entrenamiento y Validación # In[54]: training_data = reordenado.drop('var1(t)',axis=1)#.values target_data=reordenado['var1(t)'] #training_data.head() valid_data = training_data[595-30:595] valid_target=target_data[595-30:595] training_data = training_data[0:595] target_data=target_data[0:595] print(training_data.shape,target_data.shape,valid_data.shape,valid_target.shape) #training_data.head() # # Creamos el Modelo de Red Neuronal # ## Utilizaremos una Red Feedforward con Embeddings # ### Tenemos como entradas 9 columnas (2 embeddings y 7 pasos) # In[55]: def crear_modeloEmbeddings(): emb_dias = 2 #tamanio profundidad de embeddings emb_meses = 4 in_dias = Input(shape=[1], name = 'dias') emb_dias = Embedding(7+1, emb_dias)(in_dias) in_meses = Input(shape=[1], name = 'meses') emb_meses = Embedding(12+1, emb_meses)(in_meses) in_cli = Input(shape=[PASOS], name = 'cli') fe = concatenate([(emb_dias), (emb_meses)]) x = Flatten()(fe) x = Dense(PASOS,activation='tanh')(x) outp = Dense(1,activation='tanh')(x) model = Model(inputs=[in_dias,in_meses,in_cli], outputs=outp) model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['MSE']) model.summary() return model # ## Entrenamos nuestra máquina # In[56]: EPOCHS=40 model = crear_modeloEmbeddings() continuas=training_data[['var1(t-7)','var1(t-6)','var1(t-5)','var1(t-4)','var1(t-3)','var1(t-2)','var1(t-1)']] valid_continuas=valid_data[['var1(t-7)','var1(t-6)','var1(t-5)','var1(t-4)','var1(t-3)','var1(t-2)','var1(t-1)']] history=model.fit([training_data['weekday'],training_data['month'],continuas], target_data, epochs=EPOCHS ,validation_data=([valid_data['weekday'],valid_data['month'],valid_continuas],valid_target)) # ## Visualizamos Resultados # In[100]: results=model.predict([valid_data['weekday'],valid_data['month'],valid_continuas]) print( len(results) ) plt.scatter(range(len(valid_target)),valid_target,c='g') plt.scatter(range(len(results)),results,c='r') plt.title('validate') plt.show() # In[103]: plt.plot(history.history['loss']) plt.title('loss') plt.plot(history.history['val_loss']) plt.title('validate loss') plt.show() # In[104]: plt.title('Accuracy') plt.plot(history.history['mean_squared_error']) plt.show() # In[60]: compara = pd.DataFrame(np.array([valid_target, [x[0] for x in results]])).transpose() compara.columns = ['real', 'prediccion'] inverted = scaler.inverse_transform(compara.values) compara2 = pd.DataFrame(inverted) compara2.columns = ['real', 'prediccion'] compara2['diferencia'] = compara2['real'] - compara2['prediccion'] compara2.head(12) # In[61]: compara2.describe() # In[105]: compara2['real'].plot() compara2['prediccion'].plot() # # Pronóstico # A partir de la última semana de noviembre 2018, intentaremos predecir la primer semana de diciembre. # In[63]: ultimosDias = df['2018-11-16':'2018-11-30'] ultimosDias # ## Preparamos los datos para Test # In[65]: # load dataset values = ultimosDias['unidades'].values # ensure all data is float values = values.astype('float32') # normalize features #scaler = MinMaxScaler(feature_range=(-1, 1)) values=values.reshape(-1, 1) # esto lo hacemos porque tenemos 1 sola dimension scaled = scaler.fit_transform(values) reframed = series_to_supervised(scaled, PASOS, 1) reframed.reset_index(inplace=True, drop=True) contador=0 reframed['weekday']=ultimosDias['weekday'] reframed['month']=ultimosDias['month'] for i in range(reframed.index[0],reframed.index[-1]): reframed['weekday'].loc[contador]=ultimosDias['weekday'][i+8] reframed['month'].loc[contador]=ultimosDias['month'][i+8] contador=contador+1 reframed.head() reordenado=reframed[ ['weekday','month','var1(t-7)','var1(t-6)','var1(t-5)','var1(t-4)','var1(t-3)','var1(t-2)','var1(t-1)'] ] reordenado.dropna(inplace=True) reordenado # In[93]: values = reordenado.values x_test = values[5:, :] x_test = x_test.reshape((x_test.shape[0], 1, x_test.shape[1])) print(x_test.shape) print(x_test) ultDiaSemana = reordenado.weekday[len(reordenado.index)-1] ultDiaSemana # In[94]: def agregarNuevoValor(x_test,nuevoValor,ultDiaSemana): for i in range(x_test.shape[2]-3): x_test[0][0][i+2] = x_test[0][0][i+3] ultDiaSemana=ultDiaSemana+1 if ultDiaSemana>6: ultDiaSemana=0 x_test[0][0][0]=ultDiaSemana x_test[0][0][1]=12 x_test[0][0][x_test.shape[2]-1]=nuevoValor return x_test,ultDiaSemana # ## Pronóstico para la "próxima semana" # In[95]: results=[] for i in range(7): dia=np.array([x_test[0][0][0]]) mes=np.array([x_test[0][0][1]]) valores=np.array([x_test[0][0][2:9]]) parcial=model.predict([dia, mes, valores]) results.append(parcial[0]) print('pred',i,x_test) x_test,ultDiaSemana=agregarNuevoValor(x_test,parcial[0],ultDiaSemana) # ## Re-Convertimos los resultados # In[96]: adimen = [x for x in results] print(adimen) inverted = scaler.inverse_transform(adimen) inverted # ## Visualizamos el pronóstico # In[97]: prediccion1SemanaDiciembre = pd.DataFrame(inverted) prediccion1SemanaDiciembre.columns = ['pronostico'] prediccion1SemanaDiciembre.plot() prediccion1SemanaDiciembre.to_csv('pronostico_embeddings.csv') # In[98]: prediccion1SemanaDiciembre # El artículo completo en www.aprendemachinelearning.com