# We are making use of the GPU here, so remember to enable it on Colab by:
# Runtime >> Change runtime type >> Hardware accelerator (before starting the VM).
!pip install -q tensorflow-gpu==2.0.0

# Download the dataset
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00279/SUSY.csv.gz

# Unzip the .gz file
!gzip -d SUSY.csv.gz

!ls

# Load with pandas
import pandas as pd
susy = pd.read_csv('SUSY.csv', header=None)

# Inspect the first five rows:
# Column 0 is the desired target.
# Columns 1-18 are the input features for the task (both low-level and high-level, see the reference paper).
susy.head()

# For this run, we only use 100k for training
import numpy as np
X = susy.values[0:100000, 1:].astype(np.float32) # Note the casting to float32
y = susy.values[0:100000, 0:1]

# Check that the classes are balanced
import matplotlib.pyplot as plt
plt.hist(y, bins=2, rwidth=0.98, ec='black')

import tensorflow as tf

# Learn more about tf.data here:
# https://www.tensorflow.org/guide/data
# In this case, from_tensor_slices allows us to iterate over batches of (X, y) simultaneously.
train_susy = tf.data.Dataset.from_tensor_slices((X, y))

for xb, yb in train_susy.batch(4):
  print(xb.shape)
  print(yb)
  break

# Or, to get values programmatically:
xb, yb = next(iter(train_susy.batch(4)))

from tensorflow.keras.layers import Dense
from tensorflow.keras import Sequential

# A feedforward neural network with a single hidden layer
# TODO: experiment!
def build_model():
  net = Sequential()
  net.add(Dense(50, activation='sigmoid'))
  net.add(Dense(1, activation='sigmoid'))
  return net

net = build_model()

# The variables of the network are initialized on first call,
# because we have not specified an input shape anywhere.
net(xb)

net.summary()

# The model is:
# h = sigmoid( W * x + b )
# y = sigmoid( v * h + c )
# net.variables is [W, b, v, c]
len(net.variables)

from tensorflow.keras import losses, optimizers

from tqdm import tqdm_notebook

# Like last lab session, no changes
loss = losses.BinaryCrossentropy()
opt = optimizers.Adam()

loss_history = []
for epoch in tqdm_notebook(range(3)):
  
  for xb, yb in train_susy.shuffle(10000).batch(32): # The dataset is shuffled and batched at every epoch
    
      with tf.GradientTape() as tape:
        ypred = net(xb)
        l = loss(yb, ypred)
        
      # We need to get gradient on all variables
      g = tape.gradient(l, net.variables)
      # 'zip' provides us a list of (gradient, variable)
      opt.apply_gradients(zip(g, net.variables))
      
      loss_history.append(l.numpy())

# Noisy!
plt.plot(loss_history)

# Pandas provides some utilities to smooth a time series
loss_pd = pd.Series(loss_history).ewm(halflife=50)

plt.plot(loss_pd.mean())
plt.plot(loss_history, 'b', alpha=0.2)

# We use a separate set of 100k values
Xtest = susy.values[100000:200000, 1:].astype(np.float32)
ytest = susy.values[100000:200000, 0:1]

test_susy = tf.data.Dataset.from_tensor_slices((Xtest, ytest))

from tensorflow.keras import metrics

# Accumulate a running average of the accuracy on all batches
acc = metrics.BinaryAccuracy()
for xb, yb in test_susy.batch(32):
  acc(yb, net(xb))

acc.result()

# We re-initialize the model
net =  build_model()

# Compile provides all the custom blocks for training:
# - loss (BinaryCrossEntropy)
# - optimizer (Adam)
# - metrics (BinaryAccuracy and AUC)
# We use AUC to compare with the original paper. Note that computing AUC from mini-batches is not-trivial:
# https://www.tensorflow.org/api_docs/python/tf/keras/metrics/AUC
net.compile(optimizer=opt, loss=loss, 
           metrics = [
               metrics.BinaryAccuracy(),
               metrics.AUC()
           ])

# Same as before, this time with tracking of metrics
history = net.fit(train_susy.shuffle(1000).batch(32), epochs=3)

plt.plot(history.history['auc_1'])

# We remove the last dimension from y
test_susy = tf.data.Dataset.from_tensor_slices((Xtest, 
                                                ytest.reshape(-1)))

# Evaluate returns [loss, accuracy, auc], because this is how we compiled our model
net.evaluate(test_susy.batch(32))