# We are making use of the GPU here, so remember to enable it on Colab by: # Runtime >> Change runtime type >> Hardware accelerator (before starting the VM). !pip install -q tensorflow-gpu==2.0.0 # Download the dataset !wget https://archive.ics.uci.edu/ml/machine-learning-databases/00279/SUSY.csv.gz # Unzip the .gz file !gzip -d SUSY.csv.gz !ls # Load with pandas import pandas as pd susy = pd.read_csv('SUSY.csv', header=None) # Inspect the first five rows: # Column 0 is the desired target. # Columns 1-18 are the input features for the task (both low-level and high-level, see the reference paper). susy.head() # For this run, we only use 100k for training import numpy as np X = susy.values[0:100000, 1:].astype(np.float32) # Note the casting to float32 y = susy.values[0:100000, 0:1] # Check that the classes are balanced import matplotlib.pyplot as plt plt.hist(y, bins=2, rwidth=0.98, ec='black') import tensorflow as tf # Learn more about tf.data here: # https://www.tensorflow.org/guide/data # In this case, from_tensor_slices allows us to iterate over batches of (X, y) simultaneously. train_susy = tf.data.Dataset.from_tensor_slices((X, y)) for xb, yb in train_susy.batch(4): print(xb.shape) print(yb) break # Or, to get values programmatically: xb, yb = next(iter(train_susy.batch(4))) from tensorflow.keras.layers import Dense from tensorflow.keras import Sequential # A feedforward neural network with a single hidden layer # TODO: experiment! def build_model(): net = Sequential() net.add(Dense(50, activation='sigmoid')) net.add(Dense(1, activation='sigmoid')) return net net = build_model() # The variables of the network are initialized on first call, # because we have not specified an input shape anywhere. net(xb) net.summary() # The model is: # h = sigmoid( W * x + b ) # y = sigmoid( v * h + c ) # net.variables is [W, b, v, c] len(net.variables) from tensorflow.keras import losses, optimizers from tqdm import tqdm_notebook # Like last lab session, no changes loss = losses.BinaryCrossentropy() opt = optimizers.Adam() loss_history = [] for epoch in tqdm_notebook(range(3)): for xb, yb in train_susy.shuffle(10000).batch(32): # The dataset is shuffled and batched at every epoch with tf.GradientTape() as tape: ypred = net(xb) l = loss(yb, ypred) # We need to get gradient on all variables g = tape.gradient(l, net.variables) # 'zip' provides us a list of (gradient, variable) opt.apply_gradients(zip(g, net.variables)) loss_history.append(l.numpy()) # Noisy! plt.plot(loss_history) # Pandas provides some utilities to smooth a time series loss_pd = pd.Series(loss_history).ewm(halflife=50) plt.plot(loss_pd.mean()) plt.plot(loss_history, 'b', alpha=0.2) # We use a separate set of 100k values Xtest = susy.values[100000:200000, 1:].astype(np.float32) ytest = susy.values[100000:200000, 0:1] test_susy = tf.data.Dataset.from_tensor_slices((Xtest, ytest)) from tensorflow.keras import metrics # Accumulate a running average of the accuracy on all batches acc = metrics.BinaryAccuracy() for xb, yb in test_susy.batch(32): acc(yb, net(xb)) acc.result() # We re-initialize the model net = build_model() # Compile provides all the custom blocks for training: # - loss (BinaryCrossEntropy) # - optimizer (Adam) # - metrics (BinaryAccuracy and AUC) # We use AUC to compare with the original paper. Note that computing AUC from mini-batches is not-trivial: # https://www.tensorflow.org/api_docs/python/tf/keras/metrics/AUC net.compile(optimizer=opt, loss=loss, metrics = [ metrics.BinaryAccuracy(), metrics.AUC() ]) # Same as before, this time with tracking of metrics history = net.fit(train_susy.shuffle(1000).batch(32), epochs=3) plt.plot(history.history['auc_1']) # We remove the last dimension from y test_susy = tf.data.Dataset.from_tensor_slices((Xtest, ytest.reshape(-1))) # Evaluate returns [loss, accuracy, auc], because this is how we compiled our model net.evaluate(test_susy.batch(32))