:ext QuasiQuotes
import qualified H.Prelude as H
H.initialize H.defaultConfig

[r|
  library(keras)
  library(dplyr)
  library(ggplot2)
  library(lubridate)
  library(tidyr)
  library(zoo)
  library(forecast)
  library(xts) |] 

getAsDouble :: String -> [Double]
getAsDouble =  map read . lines
googl <- fmap getAsDouble (readFile "googl.csv") 
intl <- fmap getAsDouble (readFile "intl.csv") 
nvda <- fmap getAsDouble (readFile "nvda.csv") 
dates <- fmap lines (readFile "dates.csv") 

[rgraph|
  df <- data.frame(tstamp = ymd(dates_hs), googl = googl_hs, intl = intl_hs, nvda = nvda_hs) 
  tss <<- read.zoo(df)
  autoplot(tss) + facet_free() |]

[rgraph|
  acf(tss, na.action = na.pass) |]

[rgraph|
  intl_ts <- as.xts(tss[,2])
  fit <- auto.arima(intl_ts)
  fc  <- forecast(fit, h=7)
  plot(fc)|]

[rprint| reticulate::py_config() |]  

[rprint| 
  lstm_num_timesteps <<- 7
  
  intl <<- unclass(tss[,2])
  
   # difference
  intl_start <- intl[1]
  intl_diff <- diff(intl)
  
  # normalize
  minval <<- min(intl_diff)
  maxval <<- max(intl_diff)
  normalize <- function(vec, min, max) {
    (vec-min) / (max-min)
  }
  denormalize <<- function(vec,min,max) {
    vec * (max - min) + min
  }
  intl_diff <- normalize(intl_diff, minval, maxval) 
  
  # create timesteps
  X_train <<- t(sapply(1:(length(intl_diff) - lstm_num_timesteps), function(x) intl_diff[x:(x + lstm_num_timesteps - 1)]))
  y_train <<- sapply((lstm_num_timesteps + 1):(length(intl_diff)), function(x) intl_diff[x])
  
  # Keras LSTMs expect the input array to be shaped as (no. samples, no. time steps, no. features)
  dim(X_train) <<- c(dim(X_train)[1], dim(X_train)[2], 1)
  num_samples <- dim(X_train)[1]
  num_steps <<- dim(X_train)[2]
  num_features <<- dim(X_train)[3]
  c(num_samples, num_steps, num_features) |]
  

-- at this point, there is no model yet
[rprint| model |] 

[rprint| 
 
  batch_size <<- 1
  epochs <<- 20
  lstm_units <<- 4

  model <<- keras_model_sequential()
  |] 

[rprint| 
 
  model %>% 
    layer_lstm(units = lstm_units, input_shape = c(num_steps, num_features)) %>% 
    layer_dense(units = 1) %>% 
    compile(
      loss = 'mean_squared_error',
      optimizer = 'adam'
    )
  model %>% summary()
  |] 

[rprint| 
 
  model %>% fit(X_train, y_train, batch_size = batch_size, epochs = epochs)
  # model %>% save_model_hdf5(filepath = paste0(model_name, ".h5"))
  |] 

  [rgraph| 
 
   pred_train <- model %>% predict(X_train, batch_size = 1)

   pred_train <- denormalize(pred_train, minval, maxval)
   pred_train_undiff <- pred_train + intl[(lstm_num_timesteps+1):(length(intl)-1)] 
   c(length(intl), length(pred_train))
   df <- data_frame(time_id = 1:113,
                    train = intl,
                    pred_train = c(rep(NA, lstm_num_timesteps+1), pred_train_undiff))       
   df <- df %>% gather(key = 'type', value = 'value', train:pred_train)
   ggplot(df, aes(x = time_id, y = value)) + geom_line(aes(color = type)) + theme(aspect.ratio=0.8)
   |]