import tensorflow as tf
from tensorflow import keras
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import time
%config IPCompleter.greedy=True
%config Completer.use_jedi = False
from os.path import abspath
path = 'F:\\UNIVERSITY\\4TH_SEM\\Assignments\\LaTeX Report\\figures\\'
def saveto(filename):
plt.savefig(path+ filename)
def saveimg(filename, image):
cv.imwrite(path+ filename,image)
def sigmoid(hypothesis):
return 1/(1+ np.exp(-hypothesis))
def getAccuracy(predictions,labels):
pred_class = np.argmax(predictions, axis=1)
real_class = np.argmax(labels, axis=1)
valid_pred = [pred_class == real_class]
return np.sum(valid_pred)/len(real_class) # 0-1
# Loading the Data Set
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
print('x_train: ', x_train.shape); print('y_train: ', y_train.shape)
print("Pre-processing loaded data...\n")
# y_train contains labels form 0 to 9 corresponding to 10 classes.
K = len(np.unique(y_train)) # Number of Classes
Ntr = x_train.shape[0]; print('Number of training samples:', Ntr) # Number of training samples 50,000
Nte = x_test.shape[0]; print('Number of test samples: ',Nte,'\n')# Number of test samples 10,000
Din = 3072 # CIFAR10 # 32x32x3 = height x width x channel
# Normalize pixel values: Image data preprocessing
x_train, x_test = x_train / 255.0, x_test / 255.0
mean_image = np.mean(x_train, axis=0) # axis=0: mean of a column; Mean of each pixel
x_train = x_train - mean_image
x_test = x_test - mean_image
# Convert class vectors to binary class matrices.
y_train = tf.keras.utils.to_categorical(y_train, num_classes=K); print('y_train: ', y_train.shape); #print(y_train[0:10,:])
y_test = tf.keras.utils.to_categorical(y_test, num_classes=K);
x_train = np.reshape(x_train,(Ntr,Din)).astype('float32')
x_test = np.reshape(x_test,(Nte,Din)).astype('float32')
print('Reshaped x_train: ', x_train.shape)
print('Reshaped x_test: ', x_test.shape)
print("Pre-processing completed.")
x_train: (50000, 32, 32, 3) y_train: (50000, 1) Pre-processing loaded data... Number of training samples: 50000 Number of test samples: 10000 y_train: (50000, 10) Reshaped x_train: (50000, 3072) Reshaped x_test: (10000, 3072) Pre-processing completed.
A part of the code for a linear classifier for CIFAR10 given in listing 1. For our linear classifier, the score function is f (x) = Wx + b, and the loss function is the mean sum of squared errors function. [3 marks]
(Hint: If your loss explodes, reduce the leaning rate.)
print("Initializing the weight matrix with random weights...")
std=1e-5 # For random samples from N(\mu, \sigma^2), use: sigma * np.random.randn(...) + mu
w1 = std*np.random.randn(Din, K) # Initializing the weight matrix with random weights
b1 = np.zeros(K) # Initializing the bias vector
print("w1:", w1.shape);print("b1:", b1.shape)
# Keep track of two sets of parameters w1 and b1 seperately is not really efficient.
# This can be eiliminated by combining both of them into one single matrix as follows.
# Aditionally the bias term '1' must be added infront of each image row, for this to wrok.
# i.e to enable matrix multiplication.
print("Rearranging train and test samples...")
# Rearranging train and test samples: (ra=rearranged)
x_train_ra = np.concatenate((np.ones((x_train.shape[0],1)),x_train), axis=1); print('Rearranged x_train: ', x_train_ra.shape)
x_test_ra = np.concatenate((np.ones((x_test.shape[0],1)),x_test), axis=1)
# Rearranging weight matrix and bias matrix into single matrix
w1 = np.concatenate((b1.reshape(1,K), w1), axis=0); print('Rearranged w1: ',w1.shape)
print("Rearranging completed.\n")
#------------------------------------------------------------------------------------------
iterations = 300 # Gradient descent interations
lr = 1.4e-2 # Learninig rate
lr_decay= 0.999
reg = 5e-6
loss_history = [] # Vlaues of loss function at each iteration
test_loss = []
train_acc_history = [] # Training accuracy
val_acc_history = [] # Validation accuracy
m = x_train.shape[0] # Number of training examples
m2 = x_test_ra.shape[0]
# Running gradient descent number of times speciied in iterations
print("Running gradient descent...")
for t in range(1,iterations+1):
# Forward Propagation
hypothesis = x_train_ra.dot(w1)
loss = (1/(2*m))*np.sum(( hypothesis - y_train)**2) + (1/(2*m))*reg*np.sum(w1**2)
loss_history.append(loss)
# Backward Propagation
dw1 = (1/m)*(x_train_ra.T.dot(hypothesis - y_train)) + (1/m)*reg*w1
w1 = w1 - lr*dw1
# Training Accuracy and Validation Accuracy
train_acc = getAccuracy(hypothesis, y_train)
train_acc_history.append(train_acc)
valid_acc = getAccuracy(x_test_ra.dot(w1), y_test)
val_acc_history.append(valid_acc)
# Test Loss
t_loss = (1/(2*m2))*np.sum(( x_test_ra.dot(w1) - y_test)**2) + (1/(2*m2))*reg*np.sum(w1**2)
test_loss.append(t_loss)
# Print details for selected iterations
if (t%30==0) or (t==1):
print("| Epoch {:03} | Loss {:.4f} | accuracy: {:.4f} | val_loss: {:.4f} | val_accuracy: {:.4f} |"\
.format(t, loss, train_acc, t_loss, valid_acc))
# Decaying learning rate
lr = lr*lr_decay
print("Gradient Descent completed. Parameters were trained")
Initializing the weight matrix with random weights... w1: (3072, 10) b1: (10,) Rearranging train and test samples... Rearranged x_train: (50000, 3073) Rearranged w1: (3073, 10) Rearranging completed. Running gradient descent... | Epoch 001 | Loss 0.5000 | accuracy: 0.0842 | val_loss: 0.4846 | val_accuracy: 0.2485 | | Epoch 030 | Loss 0.4297 | accuracy: 0.3648 | val_loss: 0.4287 | val_accuracy: 0.3640 | | Epoch 060 | Loss 0.4126 | accuracy: 0.3810 | val_loss: 0.4123 | val_accuracy: 0.3814 | | Epoch 090 | Loss 0.4049 | accuracy: 0.3896 | val_loss: 0.4049 | val_accuracy: 0.3889 | | Epoch 120 | Loss 0.4010 | accuracy: 0.3954 | val_loss: 0.4012 | val_accuracy: 0.3921 | | Epoch 150 | Loss 0.3987 | accuracy: 0.3993 | val_loss: 0.3992 | val_accuracy: 0.3938 | | Epoch 180 | Loss 0.3973 | accuracy: 0.4020 | val_loss: 0.3980 | val_accuracy: 0.3953 | | Epoch 210 | Loss 0.3964 | accuracy: 0.4047 | val_loss: 0.3972 | val_accuracy: 0.3962 | | Epoch 240 | Loss 0.3957 | accuracy: 0.4065 | val_loss: 0.3966 | val_accuracy: 0.3960 | | Epoch 270 | Loss 0.3951 | accuracy: 0.4086 | val_loss: 0.3962 | val_accuracy: 0.3970 | | Epoch 300 | Loss 0.3946 | accuracy: 0.4103 | val_loss: 0.3958 | val_accuracy: 0.3982 | Gradient Descent completed. Parameters were trained
# ------Plotting learning rate, training and testing loss and accuracies-------
#fig, axes = plt.subplots(1,2, sharex='all', sharey='all', figsize=(20,7))
plt.figure(figsize=(20,7))
plt.plot(loss_history/np.max(loss_history), linewidth=3, label = 'Train Loss')
plt.plot(test_loss/np.max(test_loss), linewidth=3, label = 'Test Loss')
plt.plot(train_acc_history, linewidth=3, label = "Training Accuracy")
plt.plot(val_acc_history, linewidth=3, label = "Validation Accuracy")
plt.xlabel('Epoch')
plt.ylabel('Accuracy and Normalized Loss')
plt.legend(loc='lower right')
# saveto("part1plots.eps")
# items = {"Train Loss":loss_history, "Test Loss":test_loss, "Training Accuracy":train_acc_history,\
# "Validation Accuracy": val_acc_history}
# location = 1
# for key in items.keys():
# plt.subplot(1,4,location);plt.plot(items[key], color='#0000ff', linewidth=4)
# plt.title(key)
# location+=1
# -------------------Showing the weights matrix W1 as 10 images-----------------
weights = w1[1:,] # Removing the row of bias terms.
weights_pos = weights- np.min(weights)# Making the minimum weight zero.
images = ((weights_pos/np.max(weights_pos))*255).astype('uint8')
CIFAR10 = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
fig, axes = plt.subplots(2,5, sharex='all', sharey='all', figsize=(25,10))
location = 1 # Location of the image in the grid of 2x5
for i in range(K):
image = images[:,i].reshape(32,32,3)
plt.subplot(2,5,location),plt.imshow(image[:,:,::-1])
plt.title("Class: {}".format(CIFAR10[i])),plt.xticks([]),plt.yticks([])
#saveimg("Reg Image "+ str(i)+".jpg", image)
location+=1
# saveto("trainedWeightsp1.eps")
Code a two-layer fully connected network with H = 200 hidden nodes. Choose the sigmoid function as the activation function for the hidden nodes. The output layer has no activation function. [3 marks]
# Loading the Data Set
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
print('x_train: ', x_train.shape); print('y_train: ', y_train.shape)
#print(y_train[0:10])
print("Pre-processing loaded data...\n")
# y_train contains labels form 0 to 9 corresponding to 10 classes.
K = len(np.unique(y_train)) # Number of Classes
Ntr = x_train.shape[0]; print('Number of training samples:', Ntr) # Number of training samples 50,000
Nte = x_test.shape[0]; print('Number of test samples: ',Nte,'\n') # Number of test samples 10,000
Din = 3072 # CIFAR10 # 32x32x3 = height x width x channel
# Image data preprocessing
"""
Remove the normalization. Otherwise the model will not learn.
Because when the weights are extrememly small,
weight matrix will consist of almost the same elements.
and learning will stop.
"""
#x_train, x_test = x_train / 255.0, x_test / 255.0
mean_image = np.mean(x_train, axis=0) # axis=0: mean of a column; Mean of each pixel
x_train = x_train - mean_image
x_test = x_test - mean_image
# Convert class vectors to binary class matrices.
y_train = tf.keras.utils.to_categorical(y_train, num_classes=K); print('y_train: ', y_train.shape); #print(y_train[0:10,:])
y_test = tf.keras.utils.to_categorical(y_test, num_classes=K); #print(y_test[0:10,:])
x_train = np.reshape(x_train,(Ntr,Din)).astype('float32');# print(x_train[0:10, 0:20])
x_test = np.reshape(x_test,(Nte,Din)).astype('float32')
print('Reshaped x_train: ', x_train.shape)
print('Reshaped x_test: ', x_test.shape)
print("Pre-processing completed.")
x_train: (50000, 32, 32, 3) y_train: (50000, 1) Pre-processing loaded data... Number of training samples: 50000 Number of test samples: 10000 y_train: (50000, 10) Reshaped x_train: (50000, 3072) Reshaped x_test: (10000, 3072) Pre-processing completed.
H = 200 # No of hidden nodes
print("Initializing the weight matrix with random weights...")
std=1e-5 # For random samples from N(\mu, \sigma^2), use: sigma * np.random.randn(...) + mu
# Hidden Layer
w1 = std*np.random.randn(Din, H) # Initializing the weight matrix with random weights
b1 = np.zeros(H) # Initializing the bias vector
print("w1:", w1.shape);print("b1:", b1.shape)
# Last Layer
w2 = std*np.random.randn(H, K) # Initializing the weight matrix with random weights
b2 = np.zeros(K) # Initializing the bias vector
print("w2:", w2.shape);print("b2:", b2.shape)
print("Rearranging train and test samples...")
# Rearranging train and test samples: (ra=rearranged)
x_train_ra = np.concatenate((np.ones((x_train.shape[0],1)),x_train), axis=1); print('Rearranged x_train: ', x_train_ra.shape)
x_test_ra = np.concatenate((np.ones((x_test.shape[0],1)),x_test), axis=1)
# Rearranging weight matrices and bias vectors into single matrices
w1 = np.concatenate((b1.reshape(1,H), w1), axis=0); print('Rearranged w1: ',w1.shape)
w2 = np.concatenate((b2.reshape(1,K), w2), axis=0); print('Rearranged w2: ',w2.shape)
print("Rearranging completed.")
iterations = 300 # Gradient descent interations
lr = 1.4e-2 # Learninig rate
lr_decay= 0.999
reg = 5e-6
test_loss = []
loss_history = [] # Vlaues of cost function at each iteration
train_acc_history = []
val_acc_history = []
m = x_train.shape[0] # Number of training examples
m2 = x_test_ra.shape[0]
# Running gradient descent number of times speciied in iterations
print("Running gradient descent...")
for t in range(1,iterations+1):
# Forward Propagation
hypo = sigmoid(x_train_ra.dot(w1)) # Layer 1 with sigmoid activation
hypothesis = np.concatenate((np.ones((hypo.shape[0],1)),hypo), axis=1) # Rearranging for layer 2
predict = hypothesis.dot(w2) # Layer 2
loss = (1/(2*m))*np.sum(( predict - y_train)**2)\
+ (1/(2*m))*reg*np.sum(w1**2) + (1/(2*m))*reg*np.sum(w2**2)
loss_history.append(loss)
# Back Propagation partial dertivatives of Loss function
# (dl/dw2) = (dl/dpredict)(dpredic/dw2)
dpredict = (1/m)*(predict - y_train)
dw2 = hypothesis.T.dot(dpredict) + (1/m)*reg*w2
# (dl/dw1) = (dl/dh)(dh/dw1)
# (dl/dw1) = (dl/dpredict)(dpredic/dh) * (dh/dw1x)(dw1x/dw1)
dh = dpredict.dot(w2[1:,].T) # Removing bias vector w2(201x10)--> 200x10
dhdxw1 = hypo*(1 - hypo) #using hypothesis 50000*200 before rearranging.
dw1 = x_train_ra.T.dot(dh*dhdxw1) + (1/m)*reg*w1
# Gradient Descent
w1 = w1 - lr*dw1
w2 = w2 - lr*dw2
# Training Accuracy
train_acc = getAccuracy(predict, y_train)
train_acc_history.append(train_acc)
# Validation Accuracy
test_hypo = sigmoid(x_test_ra.dot(w1))
test_hypothesis = np.concatenate((np.ones((test_hypo.shape[0],1)),test_hypo), axis=1)# Rearranging for layer 2
test_predict = test_hypothesis.dot(w2)
valid_acc = getAccuracy(test_predict, y_test)
val_acc_history.append(valid_acc)
# Test Loss
t_loss = (1/(2*m2))*np.sum(( test_predict- y_test)**2)\
+ (1/(2*m2))*reg*np.sum(w1**2) + (1/(2*m2))*reg*np.sum(w2**2)
test_loss.append(t_loss)
# Print details for selected iterations
if (t%30==0) or (t==1):
print("| Epoch {:03} | Loss {:.4f} | accuracy: {:.4f} | val_loss: {:.4f} | val_accuracy: {:.4f} |"\
.format(t, loss, train_acc, t_loss, valid_acc))
# Decaying learning rate
lr = lr*lr_decay
print("Gradient Descent completed. Parameters were trained")
Initializing the weight matrix with random weights... w1: (3072, 200) b1: (200,) w2: (200, 10) b2: (10,) Rearranging train and test samples... Rearranged x_train: (50000, 3073) Rearranged w1: (3073, 200) Rearranged w2: (201, 10) Rearranging completed. Running gradient descent... | Epoch 001 | Loss 0.5000 | accuracy: 0.1000 | val_loss: 0.4541 | val_accuracy: 0.1005 | | Epoch 030 | Loss 0.4202 | accuracy: 0.2740 | val_loss: 0.4203 | val_accuracy: 0.2718 | | Epoch 060 | Loss 0.4101 | accuracy: 0.3362 | val_loss: 0.4097 | val_accuracy: 0.3352 | | Epoch 090 | Loss 0.4034 | accuracy: 0.3539 | val_loss: 0.4034 | val_accuracy: 0.3545 | | Epoch 120 | Loss 0.3943 | accuracy: 0.3891 | val_loss: 0.3955 | val_accuracy: 0.3832 | | Epoch 150 | Loss 0.3898 | accuracy: 0.4039 | val_loss: 0.3913 | val_accuracy: 0.4040 | | Epoch 180 | Loss 0.3880 | accuracy: 0.4071 | val_loss: 0.3897 | val_accuracy: 0.4013 | | Epoch 210 | Loss 0.3846 | accuracy: 0.4180 | val_loss: 0.3868 | val_accuracy: 0.4115 | | Epoch 240 | Loss 0.3821 | accuracy: 0.4263 | val_loss: 0.3853 | val_accuracy: 0.4178 | | Epoch 270 | Loss 0.3785 | accuracy: 0.4380 | val_loss: 0.3829 | val_accuracy: 0.4259 | | Epoch 300 | Loss 0.3777 | accuracy: 0.4379 | val_loss: 0.3827 | val_accuracy: 0.4233 | Gradient Descent completed. Parameters were trained
# ------Plotting learning rate, training and testing loss and accuracies-------
# fig, axes = plt.subplots(1,4, sharex='all', sharey='all', figsize=(30,6))
# items = {"Loss":loss_history, "Training Accuracy":train_acc_history,\
# "Validation Accuracy": val_acc_history, "Learning Rate":lr_hitory}
# location = 1
# for key in items.keys():
# plt.subplot(1,4,location);plt.plot(items[key], color='#0000ff', linewidth=3)
# plt.title(key)
# location+=1
plt.figure(figsize=(20,7))
plt.plot(loss_history/np.max(loss_history), linewidth=3, label = 'Train Loss')
plt.plot(test_loss/np.max(test_loss), linewidth=3, label = 'Test Loss')
plt.plot(train_acc_history, linewidth=3, label = "Training Accuracy")
plt.plot(val_acc_history, linewidth=3, label = "Validation Accuracy")
plt.xlabel('Epoch')
plt.ylabel('Accuracy and Normalized Loss')
plt.legend(loc='lower right')
# saveto("part2plots.eps")
# -------------------Showing the weights matrix W1.W2 as 10 images-----------------
weights = w1[1:,].dot(w2[1:,]) # Removing the rows of bias terms.
weights_pos = weights- np.min(weights)# Making the minimum weight zero.
images = ((weights_pos/np.max(weights_pos))*255).astype('uint8')
CIFAR10 = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
fig, axes = plt.subplots(2,5, sharex='all', sharey='all', figsize=(25,10))
location = 1 # Location of the image in the grid of 2x5
for i in range(K):
image = images[:,i].reshape(32,32,3)
plt.subplot(2,5,location),plt.imshow(image[:,:,::-1])
plt.title("Class: {}".format(CIFAR10[i])),plt.xticks([]),plt.yticks([])
# saveimg("Reg Image "+ str(i)+".jpg", image)
location+=1
# saveto("trainedWeightsnn2.eps")
Modify the code in item 2 to carry out stochastic gradient descent with a batch size of 500. [2 marks]
# Loading the Data Set
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
print('x_train: ', x_train.shape); print('y_train: ', y_train.shape)
#print(y_train[0:10])
print("Pre-processing loaded data...\n")
# y_train contains labels form 0 to 9 corresponding to 10 classes.
K = len(np.unique(y_train)) # Number of Classes
Ntr = x_train.shape[0]; print('Number of training samples:', Ntr) # Number of training samples 50,000
Nte = x_test.shape[0]; print('Number of test samples: ',Nte,'\n') # Number of test samples 10,000
Din = 3072 # CIFAR10 # 32x32x3 = height x width x channel
# Image data preprocessing
"""
Remove the normalization. Otherwise the model will not learn.
Because when the weights are extrememly small,
weight matrix will consist of almost the same elements.
and learning will stop.
"""
#x_train, x_test = x_train / 255.0, x_test / 255.0
mean_image = np.mean(x_train, axis=0) # axis=0: mean of a column; Mean of each pixel
x_train = x_train - mean_image
x_test = x_test - mean_image
# Convert class vectors to binary class matrices.
y_train = tf.keras.utils.to_categorical(y_train, num_classes=K); print('y_train: ', y_train.shape); #print(y_train[0:10,:])
y_test = tf.keras.utils.to_categorical(y_test, num_classes=K); #print(y_test[0:10,:])
x_train = np.reshape(x_train,(Ntr,Din)).astype('float32');# print(x_train[0:10, 0:20])
x_test = np.reshape(x_test,(Nte,Din)).astype('float32')
print('Reshaped x_train: ', x_train.shape)
print('Reshaped x_test: ', x_test.shape)
print("Pre-processing completed.")
x_train: (50000, 32, 32, 3) y_train: (50000, 1) Pre-processing loaded data... Number of training samples: 50000 Number of test samples: 10000 y_train: (50000, 10) Reshaped x_train: (50000, 3072) Reshaped x_test: (10000, 3072) Pre-processing completed.
H = 200 # No of hidden nodes
print("Initializing the weight matrix with random weights...")
std=1e-5 # For random samples from N(\mu, \sigma^2), use: sigma * np.random.randn(...) + mu
# Hidden Layer
w1 = std*np.random.randn(Din, H) # Initializing the weight matrix with random weights
b1 = np.zeros(H) # Initializing the bias vector
print("w1:", w1.shape);print("b1:", b1.shape)
# Last Layer
w2 = std*np.random.randn(H, K) # Initializing the weight matrix with random weights
b2 = np.zeros(K) # Initializing the bias vector
print("w2:", w2.shape);print("b2:", b2.shape)
print("Rearranging train and test samples...")
# Rearranging train and test samples: (ra=rearranged)
x_train_ra = np.concatenate((np.ones((x_train.shape[0],1)),x_train), axis=1); print('Rearranged x_train: ', x_train_ra.shape)
x_test_ra = np.concatenate((np.ones((x_test.shape[0],1)),x_test), axis=1)
# Rearranging weight matrices and bias vectors into single matrices
w1 = np.concatenate((b1.reshape(1,H), w1), axis=0); print('Rearranged w1: ',w1.shape)
w2 = np.concatenate((b2.reshape(1,K), w2), axis=0); print('Rearranged w2: ',w2.shape)
print("Rearranging completed.")
iterations = 300 # Gradient descent interations
lr = 1.4e-2 # Learninig rate
lr_decay= 0.999
reg = 5e-6
test_loss = []
loss_history = [] # Vlaues of cost function at each iteration
train_acc_history = []
val_acc_history = []
mini_batch_loss = []
m = x_train.shape[0] # Number of training examples
m2 = x_test.shape[0]
# Running gradient descent number of times speciied in iterations
print("Running stochastic gradient descent...")
beginat = time.time()
batch_size = 500
seed = 0
rng = np.random.default_rng(seed=seed)
for t in range(1,iterations+1):
indices = np.arange(Ntr)
rng.shuffle(indices)
x_train_3 = x_train_ra[indices]
y_train_3 = y_train[indices]
batch_loss = 0
for start in range(0,Ntr,batch_size):
stop = start + batch_size
# Forward Propagation
hypo = sigmoid(x_train_3[start:stop].dot(w1)) # Layer 1 with sigmoid activation
hypothesis = np.concatenate((np.ones((hypo.shape[0],1)),hypo), axis=1) # Rearranging for layer 2
predict = hypothesis.dot(w2) # Layer 2
minibatch_loss = (1/(2*m))*np.sum(( predict - y_train_3[start:stop])**2)\
+ (1/(2*m))*reg*np.sum(w1**2) + (1/(2*m))*reg*np.sum(w2**2)
mini_batch_loss.append(minibatch_loss)
batch_loss+= minibatch_loss
# Back Propagation partial dertivatives of Loss function
# (dl/dw2) = (dl/dpredict)(dpredic/dw2)
dpredict = (1/m)*(predict - y_train_3[start:stop])
dw2 = hypothesis.T.dot(dpredict) + (1/m)*reg*w2
# (dl/dw1) = (dl/dh)(dh/dw1)
# (dl/dw1) = (dl/dpredict)(dpredic/dh) * (dh/dw1x)(dw1x/dw1)
dh = dpredict.dot(w2[1:,].T) # Removing bias vector w2(201x10)--> 200x10
dhdxw1 = hypo*(1 - hypo) #using hypothesis 50000*200 before rearranging.
dw1 = x_train_3[start:stop].T.dot(dh*dhdxw1) + (1/m)*reg*w1
# Gradient Descent
w1 = w1 - lr*dw1
w2 = w2 - lr*dw2
loss_history.append(batch_loss)
# Training Accuracy
hypo = sigmoid(x_train_3.dot(w1)) # Layer 1 with sigmoid activation
hypothesis = np.concatenate((np.ones((hypo.shape[0],1)),hypo), axis=1) # Rearranging for layer 2
predict = hypothesis.dot(w2) # Layer 2
train_acc = getAccuracy(predict, y_train_3)
train_acc_history.append(train_acc)
# Validation Accuracy
test_hypo = sigmoid(x_test_ra.dot(w1))
test_hypothesis = np.concatenate((np.ones((test_hypo.shape[0],1)),test_hypo), axis=1)# Rearranging for layer 2
test_predict = test_hypothesis.dot(w2)
valid_acc = getAccuracy(test_predict, y_test)
val_acc_history.append(valid_acc)
# Test Loss
t_loss = (1/(2*m2))*np.sum(( test_predict- y_test)**2)\
+ (1/(2*m2))*reg*np.sum(w1**2) + (1/(2*m2))*reg*np.sum(w2**2)
test_loss.append(t_loss)
# Print details for selected iterations
if (t%30==0) or (t==1):
print("| Epoch {:03} | Loss {:.4f} | accuracy: {:.4f} | val_loss: {:.4f} | val_accuracy: {:.4f} |"\
.format(t, batch_loss, train_acc, t_loss, valid_acc))
#loss: 1.8916 - accuracy: 0.2991 - val_loss: 1.2849 - val_accuracy: 0.5374
# Decaying learning rate
lr = lr*lr_decay
endat = time.time()
duration = endat - beginat
print("Stochastic Gradient Descent completed in {} minutes {:.2f} seconds.Parameters were trained."\
.format(duration//60, duration%60))
Initializing the weight matrix with random weights... w1: (3072, 200) b1: (200,) w2: (200, 10) b2: (10,) Rearranging train and test samples... Rearranged x_train: (50000, 3073) Rearranged w1: (3073, 200) Rearranged w2: (201, 10) Rearranging completed. Running stochastic gradient descent... | Epoch 001 | Loss 0.4768 | accuracy: 0.1000 | val_loss: 0.4619 | val_accuracy: 0.1000 | | Epoch 030 | Loss 0.4182 | accuracy: 0.2812 | val_loss: 0.4176 | val_accuracy: 0.2813 | | Epoch 060 | Loss 0.4061 | accuracy: 0.3500 | val_loss: 0.4061 | val_accuracy: 0.3491 | | Epoch 090 | Loss 0.3966 | accuracy: 0.3836 | val_loss: 0.3972 | val_accuracy: 0.3837 | | Epoch 120 | Loss 0.3894 | accuracy: 0.4057 | val_loss: 0.3909 | val_accuracy: 0.4007 | | Epoch 150 | Loss 0.3844 | accuracy: 0.4207 | val_loss: 0.3869 | val_accuracy: 0.4130 | | Epoch 180 | Loss 0.3804 | accuracy: 0.4325 | val_loss: 0.3841 | val_accuracy: 0.4239 | | Epoch 210 | Loss 0.3770 | accuracy: 0.4425 | val_loss: 0.3819 | val_accuracy: 0.4307 | | Epoch 240 | Loss 0.3740 | accuracy: 0.4514 | val_loss: 0.3800 | val_accuracy: 0.4339 | | Epoch 270 | Loss 0.3712 | accuracy: 0.4590 | val_loss: 0.3783 | val_accuracy: 0.4384 | | Epoch 300 | Loss 0.3686 | accuracy: 0.4663 | val_loss: 0.3769 | val_accuracy: 0.4433 | Stochastic Gradient Descent completed in 23.0 minutes 60.00 seconds.Parameters were trained.
# ----------------------------Plotting Minibatch loss--------------------------
plt.figure(figsize=(20,7))
plt.plot(mini_batch_loss); plt.title('Minibatch Loss')
plt.xlabel("Total Number of Iterations"); #saveto('part3minibloss.eps')
# ------Plotting learning rate, training and testing loss and accuracies-------
# fig, axes = plt.subplots(1,4, sharex='all', sharey='all', figsize=(30,6))
# items = {"Loss":loss_history, "Training Accuracy %":train_acc_history,\
# "Validation Accuracy %": val_acc_history, "Learning Rate":lr_hitory}
# location = 1
# for key in items.keys():
# plt.subplot(1,4,location);plt.plot(items[key], color='#0000ff', linewidth=3)
# plt.title(key); plt.xlabel('Number of Iterations')
# location+=1
plt.figure(figsize=(20,7))
plt.plot(loss_history/np.max(loss_history), linewidth=3, label = 'Train Loss')
plt.plot(test_loss/np.max(test_loss), linewidth=3, label = 'Test Loss')
plt.plot(train_acc_history, linewidth=3, label = "Training Accuracy")
plt.plot(val_acc_history, linewidth=3, label = "Validation Accuracy")
plt.xlabel('Epoch')
plt.ylabel('Accuracy and Normalized Loss')
plt.legend(loc='lower right')
# saveto("part3plots.eps")
# -------------------Showing the weights matrix W1.W2 as 10 images-----------------
weights = w1[1:,].dot(w2[1:,]) # Removing the rows of bias terms.
weights_pos = weights- np.min(weights)# Making the minimum weight zero.
images = ((weights_pos/np.max(weights_pos))*255).astype('uint8')
CIFAR10 = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
fig, axes = plt.subplots(2,5, sharex='all', sharey='all', figsize=(25,10))
location = 1 # Location of the image in the grid of 2x5
for i in range(K):
image = images[:,i].reshape(32,32,3)
plt.subplot(2,5,location),plt.imshow(image[:,:,::-1])
plt.title("Class: {}".format(CIFAR10[i])), plt.xticks([]),plt.yticks([])
# saveimg(CIFAR10[i] +".jpg", image)
location+=1
# saveto("trainedWeightsnn2stochastic.eps")
Construct a CNN using Keras.models.Sequential (with the following configuration: C32, C64, C64, F64, F10. All three convolutions layers are 3x3. Max pooling (2x2) follows each convolution layer. Use SDG (with momentum) with a batch size of 50 and CategoricalCrossentropy as the loss. [2 marks]
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
(x_train, y_train), (x_test, y_test) = datasets.cifar10.load_data()
K = len(np.unique(y_train)) # Number of Classes
# Normalize pixel values: Image data preprocessing
x_train, x_test = x_train / 255.0, x_test / 255.0
mean_image = np.mean(x_train, axis=0) # axis=0: mean of a column; Mean of each pixel
x_train = x_train - mean_image
x_test = x_test - mean_image
# Convert class vectors to binary class matrices.
y_train = tf.keras.utils.to_categorical(y_train, num_classes=K)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=K)
#number of output channels for each Conv2D layer is controlled by the first argument
model = models.Sequential()
# As we go deeper into the model height and widht shrinks
# So we can increase the convolution channels
# 32, 3x3 convolutions
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3), name='C32'))
model.add(layers.MaxPooling2D((2, 2)))
# 64, 3x3 convolutions
model.add(layers.Conv2D(64, (3, 3), activation='relu', name='C64_1'))
model.add(layers.MaxPooling2D((2, 2)))
# 64, 3x3 convolutions
model.add(layers.Conv2D(64, (3, 3), activation='relu', name='C64_2'))
model.add(layers.MaxPooling2D((2, 2)))
#feeding the last output tensor from the convolutional base
# (of shape (None, 2, 2, 64)) into one or more Dense layers
# Dense layers take vectors as input
model.add(layers.Flatten()) # Make the (None, 2, 2, 64) tensor flat
model.add(layers.Dense(64, activation='relu', name='F64'))
# CIFAR has 10 output classes, final Dense layer should have 10 outputs
model.add(layers.Dense(10, name='F10'))
# Complete architecture of the model
model.summary()
# An optimizer is one of the two arguments required for compiling a Keras model:
# hyperparameter - whose value is used to control the learning process
# momentum: float hyperparameter() >= 0 that accelerates gradient descent
# in the relevant direction and dampens oscillations.
# Defaults to 0, i.e., vanilla gradient descent.
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1.4e-2, momentum=0.9),
loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
history = model.fit(x_train, y_train,
batch_size=50, epochs=10,
validation_data=(x_test, y_test))
plt.figure(figsize=(20,7))
plt.plot(history.history['loss']/np.max(history.history['loss']), linewidth=3, label = 'Train Loss')
plt.plot(history.history['val_loss']/np.max(history.history['val_loss']), linewidth=3, label = 'Test Loss')
plt.plot(history.history['accuracy'], linewidth=3, label = "Training Accuracy")
plt.plot(history.history['val_accuracy'], linewidth=3, label = "Validation Accuracy")
plt.xlabel('Epoch')
plt.ylabel('Accuracy and Normalized Loss')
plt.legend(loc='lower right')
#saveto("part4plots.eps")
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(test_acc)
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= C32 (Conv2D) (None, 30, 30, 32) 896 _________________________________________________________________ max_pooling2d (MaxPooling2D) (None, 15, 15, 32) 0 _________________________________________________________________ C64_1 (Conv2D) (None, 13, 13, 64) 18496 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 6, 6, 64) 0 _________________________________________________________________ C64_2 (Conv2D) (None, 4, 4, 64) 36928 _________________________________________________________________ max_pooling2d_2 (MaxPooling2 (None, 2, 2, 64) 0 _________________________________________________________________ flatten (Flatten) (None, 256) 0 _________________________________________________________________ F64 (Dense) (None, 64) 16448 _________________________________________________________________ F10 (Dense) (None, 10) 650 ================================================================= Total params: 73,418 Trainable params: 73,418 Non-trainable params: 0 _________________________________________________________________ Epoch 1/10 1000/1000 [==============================] - 58s 14ms/step - loss: 1.8814 - accuracy: 0.2973 - val_loss: 1.2735 - val_accuracy: 0.5427 Epoch 2/10 1000/1000 [==============================] - 8s 8ms/step - loss: 1.2343 - accuracy: 0.5598 - val_loss: 1.0891 - val_accuracy: 0.6179 Epoch 3/10 1000/1000 [==============================] - 8s 8ms/step - loss: 1.0269 - accuracy: 0.6372 - val_loss: 1.0110 - val_accuracy: 0.6507 Epoch 4/10 1000/1000 [==============================] - 8s 8ms/step - loss: 0.8947 - accuracy: 0.6852 - val_loss: 0.9615 - val_accuracy: 0.6698 Epoch 5/10 1000/1000 [==============================] - 8s 8ms/step - loss: 0.8072 - accuracy: 0.7176 - val_loss: 0.9093 - val_accuracy: 0.6863 Epoch 6/10 1000/1000 [==============================] - 8s 8ms/step - loss: 0.7252 - accuracy: 0.7482 - val_loss: 0.8807 - val_accuracy: 0.7078 Epoch 7/10 1000/1000 [==============================] - 8s 8ms/step - loss: 0.6745 - accuracy: 0.7618 - val_loss: 0.9267 - val_accuracy: 0.6909 Epoch 8/10 1000/1000 [==============================] - 8s 8ms/step - loss: 0.6278 - accuracy: 0.7780 - val_loss: 0.8539 - val_accuracy: 0.7137 Epoch 9/10 1000/1000 [==============================] - 8s 8ms/step - loss: 0.5793 - accuracy: 0.7968 - val_loss: 0.8795 - val_accuracy: 0.7131 Epoch 10/10 1000/1000 [==============================] - 8s 8ms/step - loss: 0.5406 - accuracy: 0.8113 - val_loss: 0.8898 - val_accuracy: 0.7134 313/313 - 9s - loss: 0.8898 - accuracy: 0.7134 0.7134000062942505