GATE Neural Network with Three Neurons - Backpropagation

1. Layers with Forward and Backward

In [1]:
import numpy as np
import random
import math
In [2]:
class AffineWithTwoInputs:
    def __init__(self):
        self.w = np.array([random.random(), random.random()])   # weight of one input
        self.b = np.array([random.random()])  # bias
        self.x = None
        self.dw = None
        self.db = None
        
    def forward(self, x):
        self.x = x
        out = np.dot(self.w, self.x) + self.b
        return out

    def backward(self, din):
        if isinstance(din, np.ndarray) and din.size == 1:
            din = np.asscalar(din)
        dx = np.dot(din, self.w.T)
        self.dw = np.dot(self.x.T, din)
        self.db = din
        return dx

class AffineWithOneInput:
    def __init__(self):
        self.w = np.array([random.random()])   # weight of one input
        self.b = np.array([random.random()])   # bias
        self.x = None
        self.dw = None
        self.db = None

    def forward(self, x):
        self.x = x
        out = np.dot(self.w, self.x) + self.b
        return out

    def backward(self, din):
        dx = np.dot(din, self.w.T)
        self.dw = np.dot(self.x.T, din)
        self.db = din
        return dx
    
class Relu:
    def __init__(self):
        self.x = None

    def forward(self, x):
        self.x = x
        mask = (self.x <= 0)
        out = self.x.copy()
        out[mask] = 0
        return out

    def backward(self, din):
        if isinstance(din, np.ndarray):
            mask = (self.x <= 0)
            din[mask] = 0
            dx = din
        else:
            if self.x <= 0:
                dx = 0
            else:
                dx = din
        return dx
    
class SquaredError:
    def __init__(self):
        self.z = None
        self.z_target = None
    
    def forward(self, z, z_target):
        self.z = z
        self.z_target = z_target
        loss = 1.0 / 2.0 * math.pow(self.z - self.z_target, 2)
        return loss

    def backward(self, din):
        dx = (self.z - self.z_target) * din
        return dx

2. Neural Network Model of Three Neurons

In [3]:
class ThreeNeurons:
    def __init__(self):
        self.n1 = AffineWithTwoInputs()
        self.relu1 = Relu()
        self.n2 = AffineWithTwoInputs()
        self.relu2 = Relu()
        self.n3 = AffineWithTwoInputs()
        self.relu3 = Relu()
        self.loss = SquaredError()

    def predict(self, x):
        u1 = self.n1.forward(x)
        z1 = self.relu1.forward(u1)
        u2 = self.n2.forward(x)
        z2 = self.relu2.forward(u2)
        z  = np.array([np.asscalar(z1), np.asscalar(z2)])
        u3 = self.n3.forward(z)
        z3 = self.relu3.forward(u3)
        return z3
    
    def backpropagation_gradient(self, x, z_target):
        # forward
        z3 = self.predict(x)
        self.loss.forward(z3, z_target)

        # backward
        din = 1
        din = self.loss.backward(din)
        
        din = self.relu3.backward(din)
        din = self.n3.backward(din)
        
        din_0 = self.relu1.backward(din[0])
        self.n1.backward(din_0)
        
        din_1 = self.relu2.backward(din[1])
        self.n2.backward(din_1)

    def learning(self, alpha, x, z_target):
        self.backpropagation_gradient(x, z_target)

        self.n1.w = self.n1.w - alpha * self.n1.dw
        self.n1.b = self.n1.b - alpha * self.n1.db
        self.n2.w = self.n2.w - alpha * self.n2.dw
        self.n2.b = self.n2.b - alpha * self.n2.db
        self.n3.w = self.n3.w - alpha * self.n3.dw
        self.n3.b = self.n3.b - alpha * self.n3.db

3. OR gate with Three Neurons - Learing and Testing

In [4]:
class Data:
    def __init__(self):
        self.training_input_value = np.array([(0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0)])
        self.training_z_target = np.array([0.0, 1.0, 1.0, 1.0])
        self.numTrainData = len(self.training_input_value)

if __name__ == '__main__':
    tn = ThreeNeurons()
    d = Data()
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z3 = tn.predict(x)
        z_target = d.training_z_target[idx]
        error = tn.loss.forward(z3, z_target)
        print("x: {0:s}, z3: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z3), str(z_target), error))

    max_epoch = 1000
    print_epoch_period = 100
    for i in range(max_epoch + 1):
        for idx in range(d.numTrainData):
            x = d.training_input_value[idx]
            z_target = d.training_z_target[idx]
            tn.learning(0.01, x, z_target)

        if i % print_epoch_period == 0:
            sum = 0.0
            for idx in range(d.numTrainData):
                x = d.training_input_value[idx]
                z3 = tn.predict(x)
                z_target = d.training_z_target[idx]
                sum = sum + tn.loss.forward(z3, z_target)

            print("{0:4d}-Err:{1:7.4f}, n1[w:{2:s},b:{3:s}], n2[w:{4:s},b:{5:s}], n3[w:{6:},b:{7:s}]".format(
                i, 
                sum / d.numTrainData,
                np.array_str(tn.n1.w, precision=2),
                np.array_str(tn.n1.b, precision=2),
                np.array_str(tn.n2.w, precision=2),
                np.array_str(tn.n2.b, precision=2),
                np.array_str(tn.n3.w, precision=2),
                np.array_str(tn.n3.b, precision=2))
            )
            
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z3 = tn.predict(x)
        z_target = d.training_z_target[idx]
        error = tn.loss.forward(z3, z_target)
        print("x: {0:s}, z3: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z3), str(z_target), error))
x: [ 0.  0.], z3: [ 0.95990367], z_target: 0.0, error: 0.46071
x: [ 1.  0.], z3: [ 1.49657727], z_target: 1.0, error: 0.12329
x: [ 0.  1.], z3: [ 0.98898378], z_target: 1.0, error: 0.00006
x: [ 1.  1.], z3: [ 1.52565738], z_target: 1.0, error: 0.13816
   0-Err: 0.1571, n1[w:[ 0.67  0.01],b:[ 0.47]], n2[w:[ 0.62  0.13],b:[ 0.26]], n3[w:[ 0.66  0.12],b:[ 0.58]]
 100-Err: 0.0510, n1[w:[ 0.65  0.18],b:[ 0.32]], n2[w:[ 0.62  0.15],b:[ 0.24]], n3[w:[ 0.56  0.08],b:[ 0.31]]
 200-Err: 0.0405, n1[w:[ 0.66  0.35],b:[ 0.26]], n2[w:[ 0.62  0.17],b:[ 0.23]], n3[w:[ 0.62  0.11],b:[ 0.23]]
 300-Err: 0.0350, n1[w:[ 0.64  0.46],b:[ 0.23]], n2[w:[ 0.62  0.2 ],b:[ 0.23]], n3[w:[ 0.66  0.12],b:[ 0.17]]
 400-Err: 0.0326, n1[w:[ 0.63  0.54],b:[ 0.2]], n2[w:[ 0.61  0.21],b:[ 0.22]], n3[w:[ 0.69  0.12],b:[ 0.14]]
 500-Err: 0.0317, n1[w:[ 0.61  0.58],b:[ 0.19]], n2[w:[ 0.61  0.22],b:[ 0.22]], n3[w:[ 0.71  0.12],b:[ 0.12]]
 600-Err: 0.0314, n1[w:[ 0.6   0.61],b:[ 0.18]], n2[w:[ 0.61  0.22],b:[ 0.22]], n3[w:[ 0.72  0.11],b:[ 0.11]]
 700-Err: 0.0313, n1[w:[ 0.6   0.62],b:[ 0.18]], n2[w:[ 0.61  0.22],b:[ 0.22]], n3[w:[ 0.72  0.11],b:[ 0.11]]
 800-Err: 0.0313, n1[w:[ 0.59  0.63],b:[ 0.18]], n2[w:[ 0.61  0.22],b:[ 0.22]], n3[w:[ 0.73  0.11],b:[ 0.1]]
 900-Err: 0.0313, n1[w:[ 0.59  0.63],b:[ 0.18]], n2[w:[ 0.6   0.22],b:[ 0.22]], n3[w:[ 0.73  0.11],b:[ 0.1]]
1000-Err: 0.0313, n1[w:[ 0.59  0.63],b:[ 0.18]], n2[w:[ 0.6   0.22],b:[ 0.22]], n3[w:[ 0.73  0.11],b:[ 0.1]]
x: [ 0.  0.], z3: [ 0.25585892], z_target: 0.0, error: 0.03273
x: [ 1.  0.], z3: [ 0.7519981], z_target: 1.0, error: 0.03075
x: [ 0.  1.], z3: [ 0.74227074], z_target: 1.0, error: 0.03321
x: [ 1.  1.], z3: [ 1.23840992], z_target: 1.0, error: 0.02842

4. AND gate with Three Neurons - Learing and Testing

In [5]:
class Data:
    def __init__(self):
        self.training_input_value = np.array([(0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0)])
        self.training_z_target = np.array([0.0, 0.0, 0.0, 1.0])
        self.numTrainData = len(self.training_input_value)

if __name__ == '__main__':
    tn = ThreeNeurons()
    d = Data()
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z3 = tn.predict(x)
        z_target = d.training_z_target[idx]
        error = tn.loss.forward(z3, z_target)
        print("x: {0:s}, z3: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z3), str(z_target), error))

    max_epoch = 1000
    print_epoch_period = 100
    for i in range(max_epoch + 1):
        for idx in range(d.numTrainData):
            x = d.training_input_value[idx]
            z_target = d.training_z_target[idx]
            tn.learning(0.01, x, z_target)

        if i % print_epoch_period == 0:
            sum = 0.0
            for idx in range(d.numTrainData):
                x = d.training_input_value[idx]
                z3 = tn.predict(x)
                z_target = d.training_z_target[idx]
                sum = sum + tn.loss.forward(z3, z_target)

            print("{0:4d}-Err:{1:7.4f}, n1[w:{2:s},b:{3:s}], n2[w:{4:s},b:{5:s}], n3[w:{6:},b:{7:s}]".format(
                i, 
                sum / d.numTrainData,
                np.array_str(tn.n1.w, precision=2),
                np.array_str(tn.n1.b, precision=2),
                np.array_str(tn.n2.w, precision=2),
                np.array_str(tn.n2.b, precision=2),
                np.array_str(tn.n3.w, precision=2),
                np.array_str(tn.n3.b, precision=2))
            )
            
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z3 = tn.predict(x)
        z_target = d.training_z_target[idx]
        error = tn.loss.forward(z3, z_target)
        print("x: {0:s}, z3: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z3), str(z_target), error))
x: [ 0.  0.], z3: [ 0.78815819], z_target: 0.0, error: 0.31060
x: [ 1.  0.], z3: [ 1.49597372], z_target: 0.0, error: 1.11897
x: [ 0.  1.], z3: [ 1.46896657], z_target: 0.0, error: 1.07893
x: [ 1.  1.], z3: [ 2.17678209], z_target: 1.0, error: 0.69241
   0-Err: 0.6131, n1[w:[ 0.54  0.69],b:[ 0.14]], n2[w:[ 0.74  0.57],b:[ 0.25]], n3[w:[ 0.44  0.55],b:[ 0.5]]
 100-Err: 0.0349, n1[w:[ 0.5   0.66],b:[-0.03]], n2[w:[ 0.68  0.52],b:[ 0.04]], n3[w:[ 0.29  0.34],b:[-0.1]]
 200-Err: 0.0207, n1[w:[ 0.52  0.69],b:[-0.08]], n2[w:[ 0.71  0.55],b:[-0.02]], n3[w:[ 0.4   0.44],b:[-0.26]]
 300-Err: 0.0110, n1[w:[ 0.55  0.71],b:[-0.14]], n2[w:[ 0.74  0.58],b:[-0.08]], n3[w:[ 0.48  0.52],b:[-0.38]]
 400-Err: 0.0052, n1[w:[ 0.57  0.74],b:[-0.19]], n2[w:[ 0.76  0.61],b:[-0.13]], n3[w:[ 0.55  0.58],b:[-0.47]]
 500-Err: 0.0022, n1[w:[ 0.58  0.75],b:[-0.22]], n2[w:[ 0.78  0.62],b:[-0.17]], n3[w:[ 0.6   0.63],b:[-0.53]]
 600-Err: 0.0009, n1[w:[ 0.59  0.76],b:[-0.25]], n2[w:[ 0.79  0.63],b:[-0.2]], n3[w:[ 0.63  0.66],b:[-0.57]]
 700-Err: 0.0003, n1[w:[ 0.6   0.77],b:[-0.26]], n2[w:[ 0.8   0.64],b:[-0.21]], n3[w:[ 0.65  0.68],b:[-0.59]]
 800-Err: 0.0001, n1[w:[ 0.61  0.77],b:[-0.27]], n2[w:[ 0.8   0.64],b:[-0.22]], n3[w:[ 0.67  0.7 ],b:[-0.61]]
 900-Err: 0.0000, n1[w:[ 0.61  0.77],b:[-0.28]], n2[w:[ 0.81  0.65],b:[-0.23]], n3[w:[ 0.67  0.7 ],b:[-0.62]]
1000-Err: 0.0000, n1[w:[ 0.61  0.78],b:[-0.28]], n2[w:[ 0.81  0.65],b:[-0.23]], n3[w:[ 0.68  0.71],b:[-0.62]]
x: [ 0.  0.], z3: [ 0.], z_target: 0.0, error: 0.00000
x: [ 1.  0.], z3: [ 0.00590986], z_target: 0.0, error: 0.00002
x: [ 0.  1.], z3: [ 0.00628509], z_target: 0.0, error: 0.00002
x: [ 1.  1.], z3: [ 0.99262675], z_target: 1.0, error: 0.00003

5. XOR gate with Three Neurons - Learing and Testing

In [12]:
class Data:
    def __init__(self):
        self.training_input_value = np.array([(0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0)])
        self.training_z_target = np.array([0.0, 1.0, 1.0, 0.0])
        self.numTrainData = len(self.training_input_value)

if __name__ == '__main__':
    tn = ThreeNeurons()
    d = Data()
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z3 = tn.predict(x)
        z_target = d.training_z_target[idx]
        error = tn.loss.forward(z3, z_target)
        print("x: {0:s}, z3: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z3), str(z_target), error))

    max_epoch = 2000
    print_epoch_period = 100
    for i in range(max_epoch + 1):
        for idx in range(d.numTrainData):
            x = d.training_input_value[idx]
            z_target = d.training_z_target[idx]
            tn.learning(0.01, x, z_target)

        if i % print_epoch_period == 0:
            sum = 0.0
            for idx in range(d.numTrainData):
                x = d.training_input_value[idx]
                z3 = tn.predict(x)
                z_target = d.training_z_target[idx]
                sum = sum + tn.loss.forward(z3, z_target)

            print("{0:4d}-Err:{1:7.4f}, n1[w:{2:s},b:{3:s}], n2[w:{4:s},b:{5:s}], n3[w:{6:},b:{7:s}]".format(
                i, 
                sum / d.numTrainData,
                np.array_str(tn.n1.w, precision=2),
                np.array_str(tn.n1.b, precision=2),
                np.array_str(tn.n2.w, precision=2),
                np.array_str(tn.n2.b, precision=2),
                np.array_str(tn.n3.w, precision=2),
                np.array_str(tn.n3.b, precision=2))
            )
            
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z3 = tn.predict(x)
        z_target = d.training_z_target[idx]
        error = tn.loss.forward(z3, z_target)
        print("x: {0:s}, z3: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z3), str(z_target), error))
x: [ 0.  0.], z3: [ 0.94316759], z_target: 0.0, error: 0.44478
x: [ 1.  0.], z3: [ 1.66962427], z_target: 1.0, error: 0.22420
x: [ 0.  1.], z3: [ 1.80544379], z_target: 1.0, error: 0.32437
x: [ 1.  1.], z3: [ 2.53190047], z_target: 0.0, error: 3.20526
   0-Err: 0.8269, n1[w:[ 0.92  0.65],b:[-0.01]], n2[w:[ 0.04  0.73],b:[ 0.15]], n3[w:[ 0.69  0.45],b:[ 0.79]]
 100-Err: 0.1259, n1[w:[ 0.75  0.46],b:[-0.11]], n2[w:[-0.17  0.6 ],b:[-0.03]], n3[w:[ 0.12  0.19],b:[ 0.37]]
 200-Err: 0.1175, n1[w:[ 0.75  0.45],b:[-0.08]], n2[w:[-0.29  0.57],b:[-0.06]], n3[w:[ 0.03  0.26],b:[ 0.42]]
 300-Err: 0.1057, n1[w:[ 0.75  0.45],b:[-0.07]], n2[w:[-0.45  0.54],b:[-0.09]], n3[w:[ -3.18e-04   3.96e-01],b:[ 0.45]]
 400-Err: 0.0958, n1[w:[ 0.74  0.45],b:[-0.08]], n2[w:[-0.55  0.59],b:[-0.04]], n3[w:[-0.01  0.55],b:[ 0.43]]
 500-Err: 0.0886, n1[w:[ 0.74  0.45],b:[-0.08]], n2[w:[-0.64  0.64],b:[ 0.]], n3[w:[-0.    0.68],b:[ 0.4]]
 600-Err: 0.0852, n1[w:[ 0.74  0.45],b:[-0.08]], n2[w:[-0.69  0.68],b:[ 0.]], n3[w:[ 0.01  0.77],b:[ 0.37]]
 700-Err: 0.0836, n1[w:[ 0.74  0.44],b:[-0.07]], n2[w:[-0.72  0.71],b:[-0.]], n3[w:[ 0.03  0.82],b:[ 0.34]]
 800-Err: 0.0827, n1[w:[ 0.75  0.43],b:[-0.06]], n2[w:[-0.74  0.73],b:[ 0.]], n3[w:[ 0.06  0.85],b:[ 0.32]]
 900-Err: 0.0818, n1[w:[ 0.77  0.4 ],b:[-0.05]], n2[w:[-0.75  0.74],b:[-0.]], n3[w:[ 0.08  0.87],b:[ 0.29]]
1000-Err: 0.0806, n1[w:[ 0.79  0.36],b:[-0.02]], n2[w:[-0.76  0.75],b:[-0.]], n3[w:[ 0.12  0.89],b:[ 0.27]]
1100-Err: 0.0788, n1[w:[ 0.82  0.3 ],b:[ -3.31e-05]], n2[w:[-0.76  0.76],b:[ 0.]], n3[w:[ 0.15  0.9 ],b:[ 0.25]]
1200-Err: 0.0764, n1[w:[ 0.85  0.23],b:[ 0.]], n2[w:[-0.77  0.77],b:[-0.]], n3[w:[ 0.2   0.92],b:[ 0.22]]
1300-Err: 0.0728, n1[w:[ 0.88  0.13],b:[ 0.]], n2[w:[-0.79  0.78],b:[-0.]], n3[w:[ 0.25  0.94],b:[ 0.19]]
1400-Err: 0.0672, n1[w:[ 0.91  0.01],b:[ 0.]], n2[w:[-0.81  0.8 ],b:[ 0.]], n3[w:[ 0.32  0.97],b:[ 0.16]]
1500-Err: 0.0574, n1[w:[ 0.93 -0.15],b:[  7.11e-06]], n2[w:[-0.83  0.82],b:[-0.]], n3[w:[ 0.41  1.01],b:[ 0.13]]
1600-Err: 0.0420, n1[w:[ 0.94 -0.36],b:[-0.]], n2[w:[-0.85  0.85],b:[ 0.]], n3[w:[ 0.54  1.04],b:[ 0.09]]
1700-Err: 0.0226, n1[w:[ 0.94 -0.58],b:[-0.]], n2[w:[-0.87  0.86],b:[-0.]], n3[w:[ 0.7   1.07],b:[ 0.05]]
1800-Err: 0.0077, n1[w:[ 0.93 -0.76],b:[-0.01]], n2[w:[-0.88  0.88],b:[  9.22e-05]], n3[w:[ 0.86  1.09],b:[ 0.03]]
1900-Err: 0.0017, n1[w:[ 0.94 -0.88],b:[-0.]], n2[w:[-0.88  0.88],b:[ -2.80e-05]], n3[w:[ 0.96  1.1 ],b:[ 0.02]]
2000-Err: 0.0003, n1[w:[ 0.94 -0.93],b:[ -3.59e-05]], n2[w:[-0.89  0.89],b:[ -5.41e-05]], n3[w:[ 1.01  1.1 ],b:[ 0.02]]
x: [ 0.  0.], z3: [ 0.01523926], z_target: 0.0, error: 0.00012
x: [ 1.  0.], z3: [ 0.96496405], z_target: 1.0, error: 0.00061
x: [ 0.  1.], z3: [ 0.99449935], z_target: 1.0, error: 0.00002
x: [ 1.  1.], z3: [ 0.02917622], z_target: 0.0, error: 0.00043