#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In order to generate a random curve, first we draw n = 300 real numbers uniformly at random on [0, 1], and call them $x_1, . . . , x_n$. Also, we draw $n$ n real numbers uniformly at random on $[-\frac{1}{10},\frac{1}{10}]$, call them $v_1, . . . , v_n$ representing the noise. Next, we generate the sample points $d_i$ on the random curve using, # \begin{equation} # d_i = sin(20x_i) + 3x_i + v_i # ,\,\,\,\,\text{for}\,\,\,\,i = 1, . . . , n. # \end{equation} # In[3]: p=300 #total number of samples x = np.random.uniform(0,1,p) # x ~ U[0,1] v = np.random.uniform(-0.1,0.1,p) # v ~ U[-0.1,0.1] d = np.sin(20*x) + 3*x + v plt.figure(figsize=(9, 9)) plt.scatter(x, d) plt.xlabel('$x_i$',fontsize=20) plt.ylabel('$d_i$', fontsize=20) plt.show() # We will consider a one layer neural network with one input, N = 24 hidden neurons, and 1 output neuron (the network structure is 1xNx1). # The network will thus have $3N + 1$ weights including biases. Let $w$ denote the vector of all these $3N + 1$ # weights. The output neuron will use the linear activation function $φ_output(v) = v$; all other neurons will use the # activation function $φ(v) = tanh(v)$. Given input x, we use the notation $f(x, w)$ to represent the network # output. # Now, our goal here is to use the backpropagation algorithm to train the network to find the optimal weights/network that minimize the Mean Squared Error (MSE). That is, we are seeking to solve the following optimization problem using the backpropagation algorithm, # # \begin{equation} # argmin_{\mathbf{w}}\frac{1}{n}\sum^{n}_{i=1}\left(d_i-f(x_i,\mathbf{w})\right)^2 # \end{equation} # # # ##### Neural Network Structure # In[4]: class NeuralNetworks(object): def __init__(self): #Initializing the weight vectors self.n = 24 # Number of hidden neurons self.eta = 0.01 # Gradient step learning rate self.w_1 = np.random.normal(0,1,(self.n,1)) # Input --> Hidden initial weight vector self.b_1 = np.ones((self.n,1)) self.w_2 = np.random.uniform(0,1,(self.n,1)) # Hidden --> Output initial weight vector self.b_2 = np.ones((1,1)) def FeedForward(self,x): # This method feeds forward the input x and returs the predicted output # I use the same notation as Haykin book self.v_1 = x*(self.w_1) + self.b_1 #Local Induced Fileds of the hidden layer self.y_1 = np.tanh(self.v_1) self.v_2 = self.y_1.T.dot(self.w_2) + self.b_2 self.o = self.v_2 # output of the network return self.o def loss(self,x,d): # Calculates the cost function of the network for a 'vector' of the inputs/outputs #x : input vector #d : desired output temp = np.zeros(len(x)) for i in range(len(x)): temp[i] = d[i] - self.FeedForward(x[i]) self.cost = np.mean(np.square(temp)) return self.cost def BackPropagate(self,x,y,d): # Given the input, desired output, and predicted output # this method update the weights accordingly # I used the same notation as in Haykin: (4.13) self.delta_out = (d-y)*1 # 1: phi' of the output at the local induced field self.w_2 += self.eta*self.delta_out*self.y_1 self.b_2 += self.eta*self.delta_out self.delta_1 = (1 - np.power(np.tanh(self.v_1), 2))*(self.w_2)*self.delta_out self.w_1 += self.eta*x*self.delta_1 self.b_1 += self.eta*self.delta_1 def train(self,x,d,epoch=100): # Given a vector of input and desired output, this method trains the network iter = 0 while (iter != epoch): for i in range(len(x)): o = self.FeedForward(x[i]) # Feeding forward self.BackPropagate(x[i],o,d[i]) # Backpropagating the error and updating the weights if iter%(epoch/5)==0: print ("Epoch: %d\nLoss: %f"%(iter,self.loss(x,d))) iter +=1 # In[7]: shahin = NeuralNetworks() print ("Initial Loss: %f"%(shahin.loss(x,d))) print("----|Training|----") shahin.train(x,d,5000) print("----Training Completed----") # In[8]: yy = np.zeros(len(x)) xx = np.linspace(0,1,len(x)) for i in range(len(xx)): yy[i] = shahin.FeedForward(xx[i]) plt.figure(figsize=(9, 9)) plt.scatter(x, d,label='Data-set') plt.plot(xx,yy,color='red',label='NN-Output') plt.xlabel('$x_i$',fontsize=20) plt.ylabel('$d_i$', fontsize=20) plt.legend() plt.show() # As it can be seen from the figure above, the network can accurately fit the given data set. We assumed a gradient descent learning parameter of $\eta=0.01$. The total cost of MSE of the network after convergence is approximately $MSE=0.05$ for *5000* epochs. # In[ ]: