GATE Neural Network with Linear Two Neurons - Backpropagation

1. Layers with Forward and Backward

In [1]:
import numpy as np
import random
import math
In [2]:
class AffineWithTwoInputs:
    def __init__(self):
        self.w = np.array([random.random(), random.random()])   # weight of one input
        self.b = np.array([random.random()])  # bias
        self.x = None
        self.dw = None
        self.db = None
        
    def forward(self, x):
        self.x = x
        out = np.dot(self.w, self.x) + self.b
        return out

    def backward(self, din):
        if isinstance(din, np.ndarray) and din.size == 1:
            din = np.asscalar(din)
        dx = np.dot(din, self.w.T)
        self.dw = np.dot(self.x.T, din)
        self.db = din
        return dx

class AffineWithOneInput:
    def __init__(self):
        self.w = np.array([random.random()])   # weight of one input
        self.b = np.array([random.random()])   # bias
        self.x = None
        self.dw = None
        self.db = None

    def forward(self, x):
        self.x = x
        out = np.dot(self.w, self.x) + self.b
        return out

    def backward(self, din):
        dx = np.dot(din, self.w.T)
        self.dw = np.dot(self.x.T, din)
        self.db = din
        return dx
    
class Relu:
    def __init__(self):
        self.x = None

    def forward(self, x):
        self.x = x
        mask = (self.x <= 0)
        out = self.x.copy()
        out[mask] = 0
        return out

    def backward(self, din):
        if isinstance(din, np.ndarray):
            mask = (self.x <= 0)
            din[mask] = 0
            dx = din
        else:
            if self.x <= 0:
                dx = 0
            else:
                dx = din
        return dx
    
class SquaredError:
    def __init__(self):
        self.z = None
        self.z_target = None
    
    def forward(self, z, z_target):
        self.z = z
        self.z_target = z_target
        loss = 1.0 / 2.0 * math.pow(self.z - self.z_target, 2)
        return loss

    def backward(self, din):
        dx = (self.z - self.z_target) * din
        return dx

2. Neural Network Model of Linear Two Neurons

In [3]:
class LinearTwoNeurons:
    def __init__(self):
        self.n1 = AffineWithTwoInputs()
        self.relu1 = Relu()
        self.n2 = AffineWithOneInput()
        self.relu2 = Relu()
        self.loss = SquaredError()
        print("Neuron n1 - Initial w: {0}, b: {1}".format(self.n1.w, self.n1.b))
        print("Neuron n2 - Initial w: {0}, b: {1}".format(self.n2.w, self.n2.b))


    def predict(self, x):
        u1 = self.n1.forward(x)
        z1 = self.relu1.forward(u1)
        u2 = self.n2.forward(z1)
        z2 = self.relu2.forward(u2)
        return z2
    
    def backpropagation_gradient(self, x, z_target):
        # forward
        z2 = self.predict(x)
        self.loss.forward(z2, z_target)

        # backward
        din = 1
        din = self.loss.backward(din)
        din = self.relu2.backward(din)
        din = self.n2.backward(din)
        din = self.relu1.backward(din)
        self.n1.backward(din)

    def learning(self, alpha, x, z_target):
        self.backpropagation_gradient(x, z_target)

        self.n1.w = self.n1.w - alpha * self.n1.dw
        self.n1.b = self.n1.b - alpha * self.n1.db
        self.n2.w = self.n2.w - alpha * self.n2.dw
        self.n2.b = self.n2.b - alpha * self.n2.db

3. OR gate with Two Linear Neurons - Learing and Testing

In [4]:
class Data:
    def __init__(self):
        self.training_input_value = np.array([(0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0)])
        self.training_z_target = np.array([0.0, 1.0, 1.0, 1.0])
        self.numTrainData = len(self.training_input_value)

if __name__ == '__main__':
    ltn = LinearTwoNeurons()
    d = Data()
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z2 = ltn.predict(x)
        z_target = d.training_z_target[idx]
        error = ltn.loss.forward(z2, z_target)
        print("x: {0:s}, z2: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z2), str(z_target), error))

    max_epoch = 1000
    print_epoch_period = 100
    for i in range(max_epoch + 1):
        for idx in range(d.numTrainData):
            x = d.training_input_value[idx]
            z_target = d.training_z_target[idx]
            ltn.learning(0.01, x, z_target)

        if i % print_epoch_period == 0:
            sum = 0.0
            for idx in range(d.numTrainData):
                x = d.training_input_value[idx]
                z2 = ltn.predict(x)
                z_target = d.training_z_target[idx]
                sum = sum + ltn.loss.forward(z2, z_target)

            print("Epoch{0:4d}-Error:{1:7.5f}, Neuron n1[w11: {2:7.5f}, w12: {3:7.5f}, b1: {4:7.5f}], Neuron n2[w2: {5:7.5f}, b2: {6:7.5f}]".format(
                i, 
                sum / d.numTrainData,
                ltn.n1.w[0],
                ltn.n1.w[1],
                ltn.n1.b[0],
                ltn.n2.w[0],
                ltn.n2.b[0])
            )
            
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z2 = ltn.predict(x)
        z_target = d.training_z_target[idx]
        error = ltn.loss.forward(z2, z_target)
        print("x: {0:s}, z2: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z2), str(z_target), error))
Neuron n1 - Initial w: [ 0.00705989  0.2138305 ], b: [ 0.23089211]
Neuron n2 - Initial w: [ 0.61048821], b: [ 0.14184584]
x: [ 0.  0.], z2: [ 0.28280275], z_target: 0.0, error: 0.03999
x: [ 1.  0.], z2: [ 0.28711274], z_target: 1.0, error: 0.25410
x: [ 0.  1.], z2: [ 0.41334375], z_target: 1.0, error: 0.17208
x: [ 1.  1.], z2: [ 0.41765373], z_target: 1.0, error: 0.16956
Epoch   0-Error:0.14686, Neuron n1[w11: 0.01488, w12: 0.22083, b1: 0.24054], Neuron n2[w2: 0.61674, b2: 0.15761]
Epoch 100-Error:0.04362, Neuron n1[w11: 0.30293, w12: 0.41913, b1: 0.29781], Neuron n2[w2: 0.79176, b2: 0.25576]
Epoch 200-Error:0.03448, Neuron n1[w11: 0.42743, w12: 0.48276, b1: 0.23114], Neuron n2[w2: 0.85885, b2: 0.17540]
Epoch 300-Error:0.03196, Neuron n1[w11: 0.48886, w12: 0.51099, b1: 0.19398], Neuron n2[w2: 0.89649, b2: 0.13357]
Epoch 400-Error:0.03142, Neuron n1[w11: 0.51620, w12: 0.52253, b1: 0.17684], Neuron n2[w2: 0.91330, b2: 0.11519]
Epoch 500-Error:0.03131, Neuron n1[w11: 0.52789, w12: 0.52721, b1: 0.16957], Neuron n2[w2: 0.91978, b2: 0.10784]
Epoch 600-Error:0.03128, Neuron n1[w11: 0.53296, w12: 0.52927, b1: 0.16653], Neuron n2[w2: 0.92182, b2: 0.10513]
Epoch 700-Error:0.03127, Neuron n1[w11: 0.53533, w12: 0.53035, b1: 0.16520], Neuron n2[w2: 0.92207, b2: 0.10428]
Epoch 800-Error:0.03127, Neuron n1[w11: 0.53660, w12: 0.53108, b1: 0.16454], Neuron n2[w2: 0.92161, b2: 0.10416]
Epoch 900-Error:0.03127, Neuron n1[w11: 0.53743, w12: 0.53168, b1: 0.16414], Neuron n2[w2: 0.92087, b2: 0.10433]
Epoch1000-Error:0.03127, Neuron n1[w11: 0.53808, w12: 0.53224, b1: 0.16385], Neuron n2[w2: 0.92003, b2: 0.10461]
x: [ 0.  0.], z2: [ 0.25536016], z_target: 0.0, error: 0.03260
x: [ 1.  0.], z2: [ 0.75041335], z_target: 1.0, error: 0.03115
x: [ 0.  1.], z2: [ 0.74503203], z_target: 1.0, error: 0.03250
x: [ 1.  1.], z2: [ 1.24008522], z_target: 1.0, error: 0.02882

4. AND gate with Two Linear Neurons - Learing and Testing

In [5]:
class Data:
    def __init__(self):
        self.training_input_value = np.array([(0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0)])
        self.training_z_target = np.array([0.0, 0.0, 0.0, 1.0])
        self.numTrainData = len(self.training_input_value)

if __name__ == '__main__':
    ltn = LinearTwoNeurons()
    d = Data()
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z2 = ltn.predict(x)
        z_target = d.training_z_target[idx]
        error = ltn.loss.forward(z2, z_target)
        print("x: {0:s}, z2: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z2), str(z_target), error))

    max_epoch = 1000
    print_epoch_period = 100
    for i in range(max_epoch + 1):
        for idx in range(d.numTrainData):
            x = d.training_input_value[idx]
            z_target = d.training_z_target[idx]
            ltn.learning(0.01, x, z_target)

        if i % print_epoch_period == 0:
            sum = 0.0
            for idx in range(d.numTrainData):
                x = d.training_input_value[idx]
                z2 = ltn.predict(x)
                z_target = d.training_z_target[idx]
                sum = sum + ltn.loss.forward(z2, z_target)

            print("Epoch{0:4d}-Error:{1:7.5f}, Neuron n1[w11: {2:7.5f}, w12: {3:7.5f}, b1: {4:7.5f}], Neuron n2[w2: {5:7.5f}, b2: {6:7.5f}]".format(
                i, 
                sum / d.numTrainData,
                ltn.n1.w[0],
                ltn.n1.w[1],
                ltn.n1.b[0],
                ltn.n2.w[0],
                ltn.n2.b[0])
            )
            
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z2 = ltn.predict(x)
        z_target = d.training_z_target[idx]
        error = ltn.loss.forward(z2, z_target)
        print("x: {0:s}, z2: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z2), str(z_target), error))
Neuron n1 - Initial w: [ 0.94079528  0.33696781], b: [ 0.6618524]
Neuron n2 - Initial w: [ 0.52277765], b: [ 0.73916905]
x: [ 0.  0.], z2: [ 1.08517069], z_target: 0.0, error: 0.58880
x: [ 1.  0.], z2: [ 1.57699744], z_target: 0.0, error: 1.24346
x: [ 0.  1.], z2: [ 1.26132993], z_target: 0.0, error: 0.79548
x: [ 1.  1.], z2: [ 1.75315668], z_target: 1.0, error: 0.28362
Epoch   0-Error:0.58495, Neuron n1[w11: 0.92985, w12: 0.32816, b1: 0.63937], Neuron n2[w2: 0.46733, b2: 0.69470]
Epoch 100-Error:0.06764, Neuron n1[w11: 0.91231, w12: 0.33320, b1: 0.50092], Neuron n2[w2: 0.21678, b2: 0.07118]
Epoch 200-Error:0.05013, Neuron n1[w11: 0.94734, w12: 0.42130, b1: 0.44332], Neuron n2[w2: 0.36755, b2: -0.11739]
Epoch 300-Error:0.03895, Neuron n1[w11: 0.96247, w12: 0.52675, b1: 0.38216], Neuron n2[w2: 0.47038, b2: -0.25811]
Epoch 400-Error:0.02826, Neuron n1[w11: 0.96691, w12: 0.63793, b1: 0.31187], Neuron n2[w2: 0.56200, b2: -0.39151]
Epoch 500-Error:0.01863, Neuron n1[w11: 0.96629, w12: 0.74283, b1: 0.23670], Neuron n2[w2: 0.64914, b2: -0.51412]
Epoch 600-Error:0.01110, Neuron n1[w11: 0.96584, w12: 0.83068, b1: 0.16312], Neuron n2[w2: 0.72863, b2: -0.62017]
Epoch 700-Error:0.00603, Neuron n1[w11: 0.96915, w12: 0.89659, b1: 0.09754], Neuron n2[w2: 0.79670, b2: -0.70581]
Epoch 800-Error:0.00304, Neuron n1[w11: 0.97640, w12: 0.94170, b1: 0.04391], Neuron n2[w2: 0.85114, b2: -0.77074]
Epoch 900-Error:0.00145, Neuron n1[w11: 0.98550, w12: 0.97068, b1: 0.00305], Neuron n2[w2: 0.89212, b2: -0.81754]
Epoch1000-Error:0.00066, Neuron n1[w11: 0.99428, w12: 0.98868, b1: -0.02647], Neuron n2[w2: 0.92153, b2: -0.85007]
x: [ 0.  0.], z2: [ 0.], z_target: 0.0, error: 0.00000
x: [ 1.  0.], z2: [ 0.04179845], z_target: 0.0, error: 0.00087
x: [ 0.  1.], z2: [ 0.03664245], z_target: 0.0, error: 0.00067
x: [ 1.  1.], z2: [ 0.95290215], z_target: 1.0, error: 0.00111

5. XOR gate with Two Linear Neurons - Learing and Testing

In [6]:
class Data:
    def __init__(self):
        self.training_input_value = np.array([(0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0)])
        self.training_z_target = np.array([0.0, 1.0, 1.0, 0.0])
        self.numTrainData = len(self.training_input_value)

if __name__ == '__main__':
    ltn = LinearTwoNeurons()
    d = Data()
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z2 = ltn.predict(x)
        z_target = d.training_z_target[idx]
        error = ltn.loss.forward(z2, z_target)
        print("x: {0:s}, z2: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z2), str(z_target), error))

    max_epoch = 1000
    print_epoch_period = 100
    for i in range(max_epoch + 1):
        for idx in range(d.numTrainData):
            x = d.training_input_value[idx]
            z_target = d.training_z_target[idx]
            ltn.learning(0.01, x, z_target)

        if i % print_epoch_period == 0:
            sum = 0.0
            for idx in range(d.numTrainData):
                x = d.training_input_value[idx]
                z2 = ltn.predict(x)
                z_target = d.training_z_target[idx]
                sum = sum + ltn.loss.forward(z2, z_target)

            print("Epoch{0:4d}-Error:{1:7.5f}, Neuron n1[w11: {2:7.5f}, w12: {3:7.5f}, b1: {4:7.5f}], Neuron n2[w2: {5:7.5f}, b2: {6:7.5f}]".format(
                i, 
                sum / d.numTrainData,
                ltn.n1.w[0],
                ltn.n1.w[1],
                ltn.n1.b[0],
                ltn.n2.w[0],
                ltn.n2.b[0])
            )
            
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z2 = ltn.predict(x)
        z_target = d.training_z_target[idx]
        error = ltn.loss.forward(z2, z_target)
        print("x: {0:s}, z2: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z2), str(z_target), error))
Neuron n1 - Initial w: [ 0.31726888  0.51813443], b: [ 0.10051975]
Neuron n2 - Initial w: [ 0.94270109], b: [ 0.28726142]
x: [ 0.  0.], z2: [ 0.3820215], z_target: 0.0, error: 0.07297
x: [ 1.  0.], z2: [ 0.68111121], z_target: 1.0, error: 0.05085
x: [ 0.  1.], z2: [ 0.87046738], z_target: 1.0, error: 0.00839
x: [ 1.  1.], z2: [ 1.1695571], z_target: 0.0, error: 0.68393
Epoch   0-Error:0.19279, Neuron n1[w11: 0.30923, w12: 0.50825, b1: 0.09011], Neuron n2[w2: 0.93340, b2: 0.27624]
Epoch 100-Error:0.13164, Neuron n1[w11: 0.15542, w12: 0.25162, b1: 0.07308], Neuron n2[w2: 0.76535, b2: 0.26647]
Epoch 200-Error:0.12723, Neuron n1[w11: 0.10004, w12: 0.15079, b1: 0.10976], Neuron n2[w2: 0.72507, b2: 0.31732]
Epoch 300-Error:0.12581, Neuron n1[w11: 0.06490, w12: 0.09111, b1: 0.12918], Neuron n2[w2: 0.70667, b2: 0.34586]
Epoch 400-Error:0.12530, Neuron n1[w11: 0.04195, w12: 0.05395, b1: 0.14060], Neuron n2[w2: 0.69606, b2: 0.36343]
Epoch 500-Error:0.12510, Neuron n1[w11: 0.02679, w12: 0.03030, b1: 0.14764], Neuron n2[w2: 0.68829, b2: 0.37483]
Epoch 600-Error:0.12503, Neuron n1[w11: 0.01670, w12: 0.01506, b1: 0.15208], Neuron n2[w2: 0.68152, b2: 0.38252]
Epoch 700-Error:0.12501, Neuron n1[w11: 0.00996, w12: 0.00514, b1: 0.15491], Neuron n2[w2: 0.67508, b2: 0.38788]
Epoch 800-Error:0.12500, Neuron n1[w11: 0.00544, w12: -0.00135, b1: 0.15672], Neuron n2[w2: 0.66871, b2: 0.39176]
Epoch 900-Error:0.12500, Neuron n1[w11: 0.00239, w12: -0.00564, b1: 0.15788], Neuron n2[w2: 0.66235, b2: 0.39469]
Epoch1000-Error:0.12501, Neuron n1[w11: 0.00033, w12: -0.00850, b1: 0.15861], Neuron n2[w2: 0.65600, b2: 0.39699]
x: [ 0.  0.], z2: [ 0.50103333], z_target: 0.0, error: 0.12552
x: [ 1.  0.], z2: [ 0.50124686], z_target: 1.0, error: 0.12438
x: [ 0.  1.], z2: [ 0.49545664], z_target: 1.0, error: 0.12728
x: [ 1.  1.], z2: [ 0.49567017], z_target: 0.0, error: 0.12284