Here is implementation of Neural Network from scratch without using any libraries of ML Only numpy is used for NN and matplotlib for plotting the results.
Objective: Objective of this exercise is to understand what difference layers learn, how different activation functions affect the learning rate and importantly what different neurons learn with different activation functions.
Implementation includes following
Optimization: Gradient Decent, Momentum, RMSprop, Adam (RMS+ Momentum)
Regularization: L2 Penalization, Dropouts
Activation Function: Sigmoid, Tanh, Relu, LeakyRelu, Softmax
Data set:: Two class dataset (Gaussian, Linear, Moons, Spiral, Sinasodal) and Multiclass (Gaussian distribuated data upto 9 classes)
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook
# DL library (code included)
from DeepNet import deepNet
# Toy Datasets (simulated)
import DataSet as ds
# Other datasets
from sklearn import datasets
Data
dtype = ['MOONS','GAUSSIANS','LINEAR','SINUSOIDAL','SPIRAL']
# Moons data
#Training: N=100 examples and no noise
Xr, yr,_ = ds.create_dataset(100, dtype[0],noise=0.0,varargin = 'PRESET');
#Testing: N=100 examples and 10% noise
Xs, ys,_ = ds.create_dataset(100, dtype[0],noise=0.1,varargin = 'PRESET');
print(Xr.shape, yr.shape,Xs.shape, ys.shape)
print('#Features: ',Xr.shape[0])
print('#Examples: ',Xr.shape[1])
(2, 100) (1, 100) (2, 100) (1, 100) #Features: 2 #Examples: 100
Neural Network :: Hidden Layers : [3,4]
NN = deepNet(X=Xr,y=yr,Xts=Xs, yts=ys, Net = [3,4],NetAf =['tanh'], alpha=0.01,
miniBatchSize = 0.3,printCostAt =20,AdamOpt=True,lambd=0,keepProb =[1.0])
#Classes : 2 #Features : 2 #Examples : 100 Network : [2, 3, 4, 1] ActiFun : ['tanh', 'tanh', 'sig'] keepProb : [1.0, 1.0, 1.0, 1.0]
Training and plotting
%matplotlib notebook
fig1=plt.figure(1,figsize=(8,4))
fig2=plt.figure(2,figsize=(8,5))
for i in range(20): ## 20 times
NN.fit(itr=10) ## itr=10 iteretion each time
NN.PlotLCurve(pause=0)
fig1.canvas.draw()
NN.PlotBoundries(Layers=True,pause=0)
fig2.canvas.draw()
NN.PlotLCurve()
NN.PlotBoundries(Layers=True)
print(NN)
yri,yrp = NN.predict(Xr)
ysi,ysp = NN.predict(Xs)
print('Training Accuracy ::',100*np.sum(yri==yr)/yri.shape[1])
print('Testing Accuracy ::',100*np.sum(ysi==ys)/ysi.shape[1])
Epoc @ 20 : Training Cost 5.451194e-01 Testing Cost 6.252184e-01 Epoc @ 40 : Training Cost 5.325943e-01 Testing Cost 5.949402e-01 Epoc @ 60 : Training Cost 5.221049e-01 Testing Cost 5.588211e-01 Epoc @ 80 : Training Cost 5.134952e-01 Testing Cost 5.515395e-01 Epoc @ 100 : Training Cost 4.883630e-01 Testing Cost 5.397175e-01 Epoc @ 120 : Training Cost 4.773322e-01 Testing Cost 5.394530e-01 Epoc @ 140 : Training Cost 4.499698e-01 Testing Cost 5.214815e-01 Epoc @ 160 : Training Cost 4.343921e-01 Testing Cost 4.811901e-01 Epoc @ 180 : Training Cost 3.963799e-01 Testing Cost 4.621540e-01 Epoc @ 200 : Training Cost 3.614711e-01 Testing Cost 4.280268e-01 -------------Info--------------- #Classes : 2 #Features : 2 #Examples : 100 Network : [2, 3, 4, 1] ActiFun : ['tanh', 'tanh', 'sig'] keepProb : [1.0, 1.0, 1.0, 1.0] Alpha : 0.01 B1, B2 : 0.9 0.99 lambd : 0 AdamOpt : True --------------------------- Training Accuracy :: 85.0 Testing Accuracy :: 80.0
plt.close(fig1)
plt.close(fig2)
Data
dtype = ['MOONS','GAUSSIANS','LINEAR','SINUSOIDAL','SPIRAL']
#Training: N=200 examples and no noise
Xr, yr,_ = ds.create_dataset(200, dtype[3],0.0,varargin = 'PRESET');
#Testing: N=200 examples and 10% noise
Xs, ys,_ = ds.create_dataset(200, dtype[3],0.1,varargin = 'PRESET');
print(Xr.shape, yr.shape,Xs.shape, ys.shape)
print('#Features: ',Xr.shape[0])
print('#Examples: ',Xr.shape[1])
(2, 200) (1, 200) (2, 200) (1, 200) #Features: 2 #Examples: 200
Neural Network :: Hidden Layers : [8,8,5]
NN = deepNet(X=Xr,y=yr,Xts=Xs, yts=ys, Net = [8,8,5],NetAf =['tanh'], alpha=0.01,
miniBatchSize = 0.3, printCostAt =100, AdamOpt=True,lambd=0,keepProb =[1.0])
#Classes : 2 #Features : 2 #Examples : 200 Network : [2, 8, 8, 5, 1] ActiFun : ['tanh', 'tanh', 'tanh', 'sig'] keepProb : [1.0, 1.0, 1.0, 1.0, 1.0]
Training and plotting
%matplotlib notebook
plt.close(fig1)
plt.close(fig2)
fig1=plt.figure(1,figsize=(8,4))
fig2=plt.figure(2,figsize=(8,5))
for i in range(20): ## 20 times
NN.fit(itr=10) ## itr=10 iteretion each time
NN.PlotLCurve(pause=0)
fig1.canvas.draw()
NN.PlotBoundries(Layers=True,pause=0)
fig2.canvas.draw()
NN.PlotLCurve()
NN.PlotBoundries(Layers=True)
print(NN)
yri,yrp = NN.predict(Xr)
ysi,ysp = NN.predict(Xs)
print('Training Accuracy ::',100*np.sum(yri==yr)/yri.shape[1])
print('Testing Accuracy ::',100*np.sum(ysi==ys)/ysi.shape[1])
Epoc @ 100 : Training Cost 1.819583e-01 Testing Cost 6.126728e-01 Epoc @ 200 : Training Cost 9.083479e-02 Testing Cost 6.960079e-01 -------------Info--------------- #Classes : 2 #Features : 2 #Examples : 200 Network : [2, 8, 8, 5, 1] ActiFun : ['tanh', 'tanh', 'tanh', 'sig'] keepProb : [1.0, 1.0, 1.0, 1.0, 1.0] Alpha : 0.01 B1, B2 : 0.9 0.99 lambd : 0 AdamOpt : True --------------------------- Training Accuracy :: 97.0 Testing Accuracy :: 86.0
plt.close(fig1)
plt.close(fig2)
Data (70-30 split)
X, y = ds.mclassGaus(N=500, nClasses = 4,var =0.25,ShowPlot=False)
[n,N] =X.shape
r = np.random.permutation(N)
split =int(0.7*N)
Xr = X[:,r[:split]]
yr = y[:,r[:split]]
Xs = X[:,r[split:]]
ys = y[:,r[split:]]
print(Xr.shape, yr.shape,Xs.shape,ys.shape)
print('#Features: ',Xr.shape[0])
print('#Examples: ',Xr.shape[1])
(2, 2000) (1, 2000) (2, 1400) (1, 1400) (2, 600) (1, 600) #Features: 2 #Examples: 1400
Neural Network :: Hidden Layers : [8,8,5]
NN = deepNet(X=Xr,y=yr,Xts=Xs, yts=ys, Net = [8,8,5],NetAf =['tanh'], alpha=0.01,
miniBatchSize = 0.3,printCostAt =-1,AdamOpt=True,lambd=0,keepProb =[1.0])
1 1400 4 1 600 4 #Classes : 4 #Features : 2 #Examples : 1400 Network : [2, 8, 8, 5, 4] ActiFun : ['tanh', 'tanh', 'tanh', 'softmax'] keepProb : [1.0, 1.0, 1.0, 1.0, 1.0]
plt.close(fig1)
plt.close(fig2)
fig1=plt.figure(1,figsize=(8,4))
fig2=plt.figure(2,figsize=(8,5))
for i in range(20): ## 20 times
NN.fit(itr=10) ## itr=10 iteretion each time
NN.PlotLCurve(pause=0)
fig1.canvas.draw()
NN.PlotBoundries(Layers=True,pause=0)
fig2.canvas.draw()
NN.PlotLCurve()
NN.PlotBoundries(Layers=True)
print(NN)
yri,yrp = NN.predict(Xr)
ysi,ysp = NN.predict(Xs)
print('Training Accuracy ::',100*np.sum(yri==yr)/yri.shape[1])
print('Testing Accuracy ::',100*np.sum(ysi==ys)/ysi.shape[1])
-------------Info--------------- #Classes : 4 #Features : 2 #Examples : 1400 Network : [2, 8, 8, 5, 4] ActiFun : ['tanh', 'tanh', 'tanh', 'softmax'] keepProb : [1.0, 1.0, 1.0, 1.0, 1.0] Alpha : 0.01 B1, B2 : 0.9 0.99 lambd : 0 AdamOpt : True --------------------------- Training Accuracy :: 97.07142857142857 Testing Accuracy :: 97.66666666666667
plt.close(fig1)
plt.close(fig2)
print(Xr.shape, yr.shape,Xs.shape,ys.shape)
print('#Features: ',Xr.shape[0])
print('#Examples: ',Xr.shape[1])
NN = deepNet(X=Xr,y=yr,Xts=Xs, yts=ys, Net = [8,8,5],NetAf =['relu'], alpha=0.01,
miniBatchSize = 0.3,printCostAt =-1,AdamOpt=True,lambd=0,keepProb =[1.0])
plt.close(fig1)
plt.close(fig2)
fig1=plt.figure(1,figsize=(8,4))
fig2=plt.figure(2,figsize=(8,5))
for i in range(20): ## 20 times
NN.fit(itr=10) ## itr=10 iteretion each time
NN.PlotLCurve(pause=0)
fig1.canvas.draw()
NN.PlotBoundries(Layers=True,pause=0)
fig2.canvas.draw()
NN.PlotLCurve()
NN.PlotBoundries(Layers=True)
print(NN)
yri,yrp = NN.predict(Xr)
ysi,ysp = NN.predict(Xs)
print('Training Accuracy ::',100*np.sum(yri==yr)/yri.shape[1])
print('Testing Accuracy ::',100*np.sum(ysi==ys)/ysi.shape[1])
(2, 1400) (1, 1400) (2, 600) (1, 600) #Features: 2 #Examples: 1400 1 1400 4 1 600 4 #Classes : 4 #Features : 2 #Examples : 1400 Network : [2, 8, 8, 5, 4] ActiFun : ['relu', 'relu', 'relu', 'softmax'] keepProb : [1.0, 1.0, 1.0, 1.0, 1.0]
-------------Info--------------- #Classes : 4 #Features : 2 #Examples : 1400 Network : [2, 8, 8, 5, 4] ActiFun : ['relu', 'relu', 'relu', 'softmax'] keepProb : [1.0, 1.0, 1.0, 1.0, 1.0] Alpha : 0.01 B1, B2 : 0.9 0.99 lambd : 0 AdamOpt : True --------------------------- Training Accuracy :: 97.42857142857143 Testing Accuracy :: 97.5
plt.close(fig1)
plt.close(fig2)
Xy= datasets.load_digits()
X = Xy['data']
y = Xy['target']
print(X.shape, y.shape)
(1797, 64) (1797,)
fig=plt.figure(1,figsize=(10,1))
for i in range(10):
plt.subplot(1,10,i+1)
plt.imshow(X[i].reshape([8,8]),cmap='gray',aspect='auto')
plt.title('y :' + str(y[i]))
plt.axis('off')
plt.subplots_adjust(top=0.8,wspace=0.12, hspace=0)
plt.show()
plt.close(fig)
N =X.shape[0] # total examples
r = np.random.permutation(N)
split=int(0.7*N)
Xr = X[r[:split],:].T
yr = y[r[:split]][None,:]
Xs = X[r[split:],:].T
ys = y[r[split:]][None,:]
print(Xr.shape, yr.shape, Xs.shape, ys.shape)
print('#Features: ',Xr.shape[0])
print('#Examples: ',Xr.shape[1])
(64, 1257) (1, 1257) (64, 540) (1, 540) #Features: 64 #Examples: 1257
NN = deepNet(X = Xr,y=yr,Xts=Xs, yts=ys, Net = [8,8,5],NetAf =['relu'], alpha=0.01,
miniBatchSize = 0.3, printCostAt =10,AdamOpt=True,lambd=0,keepProb =[1.0])
1 1257 10 1 540 10 #Classes : 10 #Features : 64 #Examples : 1257 Network : [64, 8, 8, 5, 10] ActiFun : ['relu', 'relu', 'relu', 'softmax'] keepProb : [1.0, 1.0, 1.0, 1.0, 1.0]
plt.close(fig)
fig=plt.figure(1,figsize=(8,4))
for i in range(100):
NN.fit(itr=2)
NN.PlotLCurve(pause=0)
fig.canvas.draw()
NN.PlotLCurve()
print(NN)
yri,yrp = NN.predict(Xr)
ysi,ysp = NN.predict(Xs)
print('Training Accuracy ::',100*np.sum(yri==yr)/yri.shape[1])
print('Testing Accuracy ::',100*np.sum(ysi==ys)/ysi.shape[1])
Epoc @ 10 : Training Cost 1.669677e+00 Testing Cost 1.676300e+00 Epoc @ 20 : Training Cost 1.163774e+00 Testing Cost 1.207048e+00 Epoc @ 30 : Training Cost 8.466815e-01 Testing Cost 8.878032e-01 Epoc @ 40 : Training Cost 6.114897e-01 Testing Cost 6.825550e-01 Epoc @ 50 : Training Cost 4.852754e-01 Testing Cost 5.607571e-01 Epoc @ 60 : Training Cost 4.209391e-01 Testing Cost 5.582497e-01 Epoc @ 70 : Training Cost 3.606742e-01 Testing Cost 4.806711e-01 Epoc @ 80 : Training Cost 3.139481e-01 Testing Cost 4.311313e-01 Epoc @ 90 : Training Cost 2.980853e-01 Testing Cost 4.202055e-01 Epoc @ 100 : Training Cost 3.096001e-01 Testing Cost 4.799720e-01 Epoc @ 110 : Training Cost 2.499592e-01 Testing Cost 4.405643e-01 Epoc @ 120 : Training Cost 2.412105e-01 Testing Cost 4.029100e-01 Epoc @ 130 : Training Cost 2.904016e-01 Testing Cost 5.736184e-01 Epoc @ 140 : Training Cost 3.113402e-01 Testing Cost 5.247168e-01 Epoc @ 150 : Training Cost 2.727288e-01 Testing Cost 5.830139e-01 Epoc @ 160 : Training Cost 2.421917e-01 Testing Cost 5.464511e-01 Epoc @ 170 : Training Cost 2.423085e-01 Testing Cost 4.962994e-01 Epoc @ 180 : Training Cost 2.507808e-01 Testing Cost 5.089880e-01 Epoc @ 190 : Training Cost 1.873999e-01 Testing Cost 5.319164e-01 Epoc @ 200 : Training Cost 2.001438e-01 Testing Cost 5.730558e-01 -------------Info--------------- #Classes : 10 #Features : 64 #Examples : 1257 Network : [64, 8, 8, 5, 10] ActiFun : ['relu', 'relu', 'relu', 'softmax'] keepProb : [1.0, 1.0, 1.0, 1.0, 1.0] Alpha : 0.01 B1, B2 : 0.9 0.99 lambd : 0 AdamOpt : True --------------------------- Training Accuracy :: 92.442322991249 Testing Accuracy :: 87.22222222222223
plt.close(fig)
Xy = datasets.load_breast_cancer()
X = Xy['data']
y = Xy['target']
print(X.shape, y.shape)
(569, 30) (569,)
N =X.shape[0] # total examples
r = np.random.permutation(N)
split=int(0.7*N)
Xr = X[r[:split],:].T
yr = y[r[:split]][None,:]
Xs = X[r[split:],:].T
ys = y[r[split:]][None,:]
print(Xr.shape, yr.shape, Xs.shape, ys.shape)
print('#Features: ',Xr.shape[0])
print('#Examples: ',Xr.shape[1])
(30, 398) (1, 398) (30, 171) (1, 171) #Features: 30 #Examples: 398
Normalizing Data
mn = np.mean(Xr,axis=1)[:,None]
sd = np.std(Xr,axis=1)[:,None]
Xrn=(Xr-mn)/sd
Xsn=(Xs-mn)/sd
print(Xrn.shape, yr.shape, Xsn.shape, ys.shape)
(30, 398) (1, 398) (30, 171) (1, 171)
NN = deepNet(X = Xrn,y=yr,Xts=Xsn, yts=ys, Net = [8,8,5],NetAf =['relu'], alpha=0.01,
miniBatchSize = 0.3, printCostAt =10,AdamOpt=True,lambd=0,keepProb =[1.0])
#Classes : 2 #Features : 30 #Examples : 398 Network : [30, 8, 8, 5, 1] ActiFun : ['relu', 'relu', 'relu', 'sig'] keepProb : [1.0, 1.0, 1.0, 1.0, 1.0]
plt.close(fig)
fig=plt.figure(1,figsize=(8,4))
for i in range(100):
NN.fit(itr=2)
NN.PlotLCurve(pause=0)
fig.canvas.draw()
NN.PlotLCurve()
print(NN)
yri,yrp = NN.predict(Xrn)
ysi,ysp = NN.predict(Xsn)
print('Training Accuracy ::',100*np.sum(yri==yr)/yri.shape[1])
print('Testing Accuracy ::',100*np.sum(ysi==ys)/ysi.shape[1])
Epoc @ 10 : Training Cost 9.471268e-02 Testing Cost 1.207643e-01 Epoc @ 20 : Training Cost 3.918440e-02 Testing Cost 9.051653e-02 Epoc @ 30 : Training Cost 1.906984e-02 Testing Cost 1.128862e-01 Epoc @ 40 : Training Cost 1.213079e-02 Testing Cost 1.504232e-01 Epoc @ 50 : Training Cost 8.890949e-03 Testing Cost 1.936689e-01 Epoc @ 60 : Training Cost 5.657525e-03 Testing Cost 2.314398e-01 Epoc @ 70 : Training Cost 1.394953e-03 Testing Cost 4.292456e-01 Epoc @ 80 : Training Cost 1.233529e-04 Testing Cost 2.726136e-01 Epoc @ 90 : Training Cost 2.483784e-05 Testing Cost 3.341603e-01 Epoc @ 100 : Training Cost 1.304866e-05 Testing Cost 3.485662e-01 Epoc @ 110 : Training Cost 8.223927e-06 Testing Cost 3.671845e-01 Epoc @ 120 : Training Cost 6.269460e-06 Testing Cost 3.721386e-01 Epoc @ 130 : Training Cost 5.191484e-06 Testing Cost 3.768768e-01 Epoc @ 140 : Training Cost 4.285282e-06 Testing Cost 3.828834e-01 Epoc @ 150 : Training Cost 3.736508e-06 Testing Cost 3.891559e-01 Epoc @ 160 : Training Cost 3.149952e-06 Testing Cost 3.941071e-01 Epoc @ 170 : Training Cost 2.818114e-06 Testing Cost 3.951910e-01 Epoc @ 180 : Training Cost 2.527101e-06 Testing Cost 3.974741e-01 Epoc @ 190 : Training Cost 2.286213e-06 Testing Cost 4.008715e-01 Epoc @ 200 : Training Cost 2.137257e-06 Testing Cost 4.023615e-01 -------------Info--------------- #Classes : 2 #Features : 30 #Examples : 398 Network : [30, 8, 8, 5, 1] ActiFun : ['relu', 'relu', 'relu', 'sig'] keepProb : [1.0, 1.0, 1.0, 1.0, 1.0] Alpha : 0.01 B1, B2 : 0.9 0.99 lambd : 0 AdamOpt : True --------------------------- Training Accuracy :: 100.0 Testing Accuracy :: 95.90643274853801
plt.close(fig)
Nikesh Bajaj $$.$$
Email:\\ n.bajaj@qmul.ac.uk \\ bajaj.nikkey@gmail.com \\ http://nikeshbajaj.in \\