# data handling
import numpy as np
import pandas as pd
# plots
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
# create animations
from celluloid import Camera
from IPython.display import HTML
# deep learning
import tensorflow
from tensorflow.keras.layers import Dense, Input, Activation
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
# make it reproducible
seed = 12345
np.random.seed(seed)
tensorflow.random.set_seed(seed)
# make plots bigger
plt.rcParams['figure.figsize'] = [12, 8]
# animations
plt.rcParams['animation.html'] = 'html5'
# plot stlye
sns.set_style(style='white', rc={"xtick.major.size": 0, "ytick.major.size": 0})
For this specific problem a synthetic set has been created. This set contains a total of 400 samples with 2 clases (200 samples of each class). Each sample has 2 variables X0
and X1
, so it can be plotted in a 2D figure.
# set the size of the dataset
size = 200
# generating x
x_train_1 = np.random.normal(-1, 0.5, (size//2, 2))
x_train_2 = np.random.normal(0, 0.5, (size, 2))
x_train_3 = np.random.normal(1, 0.5, (size//2, 2))
# generating y
y_train_1 = np.repeat(0, size//2)
y_train_2 = np.repeat(1, size)
y_train_3 = np.repeat(0, size//2)
# creating dataframe
df_train = pd.DataFrame(np.concatenate((x_train_1, x_train_2, x_train_3)), columns=['X0', 'X1'])
df_train['y'] = np.concatenate((y_train_1, y_train_2, y_train_3))
# show dataset
ax = sns.scatterplot(x='X0', y='X1', data=df_train, hue='y', palette=sns.diverging_palette(220, 20, n=2))
ax.figure.patch.set_alpha(0)
# convert to numpy arrays
x_train, y_train = df_train[['X0', 'X1']].values, df_train[['y']].values
To show the training evolution, a very simple network has been created.
It contains 1 hidden layer with 2 neurons and an output layer with 1 neuron. The output layer has a sigmoid
activation function 'shrinking' the output to a value between [0, 1], which will represent the probability of belonging to class 0 or class 1.
def gen_model(activation='tanh', learning_rate=1e-1):
'''
Generates the model with the corresponding structure
'''
# define model structure
x_in = Input(x_train.shape[1], name='input')
# FC 2 units
x = Dense(units=2, name='hidden_1')(x_in)
# Tanh activation
x = Activation(activation, name='activation_1')(x)
# FC 1 unit
x = Dense(units=1, name='output')(x)
# Sigmoid activation
x_out = Activation('sigmoid', name='activation_output')(x)
# generate the model
model = Model(x_in, x_out)
# show information about the model
model.summary()
# parameters
optimizer = Adam(lr=learning_rate)
metrics = ["accuracy"]
loss = "binary_crossentropy"
# compile the model
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
# return the model
return model
# set the value of the activation and learning rate
activation = 'tanh'
learning_rate = 1e-1
# generate the model
model = gen_model(activation, learning_rate)
# figure holding the evolution
f, axes = plt.subplots(1, 3, figsize=(18, 6), gridspec_kw={'height_ratios':[.9]})
f.subplots_adjust(top=0.82)
# camera to record the evolution
camera = Camera(f)
# number of epochs
epochs = 20
# iterate epoch times
for i in range(epochs):
# evaluate the model (acc, loss)
evaluation = model.evaluate(x_train, y_train, verbose=0)
# generate intermediate models
model_hid_1 = Model(model.input, model.get_layer("hidden_1").output)
model_act_1 = Model(model.input, model.get_layer("activation_1").output)
# generate data
df_hid_1 = pd.DataFrame(model_hid_1.predict(x_train), columns=['X0', 'X1'])
df_hid_1['y'] = y_train
df_act_1 = pd.DataFrame(model_act_1.predict(x_train), columns=['X0', 'X1'])
df_act_1['y'] = y_train
# generate meshgrid (200 values)
x = np.linspace(x_train[:,0].min(), x_train[:,0].max(), 200)
y = np.linspace(x_train[:,1].min(), x_train[:,1].max(), 200)
xv, yv = np.meshgrid(x, y)
# generate meshgrid intenisty
df_mg_train = pd.DataFrame(np.stack((xv.flatten(), yv.flatten()), axis=1), columns=['X0', 'X1'])
df_mg_train['y'] = model.predict(df_mg_train.values)
df_mg_hid_1 = pd.DataFrame(model_hid_1.predict(df_mg_train.values[:,:-1]), columns=['X0', 'X1'])
df_mg_hid_1['y'] = model.predict(df_mg_train.values[:,:-1])
df_mg_act_1 = pd.DataFrame(model_act_1.predict(df_mg_train.values[:,:-1]), columns=['X0', 'X1'])
df_mg_act_1['y'] = model.predict(df_mg_train.values[:,:-1])
# show dataset
ax = sns.scatterplot(x='X0', y='X1', data=df_mg_train, hue='y', x_jitter=True, y_jitter=True, legend=None, ax=axes[0], palette=sns.diverging_palette(220, 20, as_cmap=True), alpha=0.15)
ax = sns.scatterplot(x='X0', y='X1', data=df_train, hue='y', legend=None, ax=axes[0], palette=sns.diverging_palette(220, 20, n=2))
ax.set_title('Input layer')
ax = sns.scatterplot(x='X0', y='X1', data=df_mg_hid_1, hue='y', x_jitter=True, y_jitter=True, legend=None, ax=axes[1], palette=sns.diverging_palette(220, 20, as_cmap=True), alpha=0.15)
ax = sns.scatterplot(x='X0', y='X1', data=df_hid_1, hue='y', legend=None, ax=axes[1], palette=sns.diverging_palette(220, 20, n=2))
ax.set_title('Hidden layer')
# show the current epoch and the metrics
ax.text(x=0.5, y=1.15, s='Epoch {}'.format(i+1), fontsize=16, weight='bold', ha='center', va='bottom', transform=ax.transAxes)
ax.text(x=0.5, y=1.08, s='Accuracy {:.3f} - Loss {:.3f}'.format(evaluation[1], evaluation[0]), fontsize=13, ha='center', va='bottom', transform=ax.transAxes)
ax = sns.scatterplot(x='X0', y='X1', data=df_mg_act_1, hue='y', x_jitter=True, y_jitter=True, legend=None, ax=axes[2], palette=sns.diverging_palette(220, 20, as_cmap=True), alpha=0.15)
ax = sns.scatterplot(x='X0', y='X1', data=df_act_1, hue='y', legend=None, ax=axes[2], palette=sns.diverging_palette(220, 20, n=2))
ax.set_title('Activation')
# show the plot
plt.show()
# call to generate the GIF
camera.snap()
# stop execution if loss <= 0.263 (avoid looping 200 times if not needed)
if evaluation[0] <= 0.263:
break
# train the model 1 epoch
model.fit(x_train, y_train, epochs=1, verbose=0)
# create the animation
anim = camera.animate()
# show the animation
HTML(anim.to_html5_video())