:ext QuasiQuotes import qualified H.Prelude as H H.initialize H.defaultConfig [r| library(keras) library(dplyr) library(ggplot2) library(lubridate) library(tidyr) library(zoo) library(forecast) library(xts) |] getAsDouble :: String -> [Double] getAsDouble = map read . lines googl <- fmap getAsDouble (readFile "googl.csv") intl <- fmap getAsDouble (readFile "intl.csv") nvda <- fmap getAsDouble (readFile "nvda.csv") dates <- fmap lines (readFile "dates.csv") [rgraph| df <- data.frame(tstamp = ymd(dates_hs), googl = googl_hs, intl = intl_hs, nvda = nvda_hs) tss <<- read.zoo(df) autoplot(tss) + facet_free() |] [rgraph| acf(tss, na.action = na.pass) |] [rgraph| intl_ts <- as.xts(tss[,2]) fit <- auto.arima(intl_ts) fc <- forecast(fit, h=7) plot(fc)|] [rprint| reticulate::py_config() |] [rprint| lstm_num_timesteps <<- 7 intl <<- unclass(tss[,2]) # difference intl_start <- intl[1] intl_diff <- diff(intl) # normalize minval <<- min(intl_diff) maxval <<- max(intl_diff) normalize <- function(vec, min, max) { (vec-min) / (max-min) } denormalize <<- function(vec,min,max) { vec * (max - min) + min } intl_diff <- normalize(intl_diff, minval, maxval) # create timesteps X_train <<- t(sapply(1:(length(intl_diff) - lstm_num_timesteps), function(x) intl_diff[x:(x + lstm_num_timesteps - 1)])) y_train <<- sapply((lstm_num_timesteps + 1):(length(intl_diff)), function(x) intl_diff[x]) # Keras LSTMs expect the input array to be shaped as (no. samples, no. time steps, no. features) dim(X_train) <<- c(dim(X_train)[1], dim(X_train)[2], 1) num_samples <- dim(X_train)[1] num_steps <<- dim(X_train)[2] num_features <<- dim(X_train)[3] c(num_samples, num_steps, num_features) |] -- at this point, there is no model yet [rprint| model |] [rprint| batch_size <<- 1 epochs <<- 20 lstm_units <<- 4 model <<- keras_model_sequential() |] [rprint| model %>% layer_lstm(units = lstm_units, input_shape = c(num_steps, num_features)) %>% layer_dense(units = 1) %>% compile( loss = 'mean_squared_error', optimizer = 'adam' ) model %>% summary() |] [rprint| model %>% fit(X_train, y_train, batch_size = batch_size, epochs = epochs) # model %>% save_model_hdf5(filepath = paste0(model_name, ".h5")) |] [rgraph| pred_train <- model %>% predict(X_train, batch_size = 1) pred_train <- denormalize(pred_train, minval, maxval) pred_train_undiff <- pred_train + intl[(lstm_num_timesteps+1):(length(intl)-1)] c(length(intl), length(pred_train)) df <- data_frame(time_id = 1:113, train = intl, pred_train = c(rep(NA, lstm_num_timesteps+1), pred_train_undiff)) df <- df %>% gather(key = 'type', value = 'value', train:pred_train) ggplot(df, aes(x = time_id, y = value)) + geom_line(aes(color = type)) + theme(aspect.ratio=0.8) |]