# Implement a simple neural network¶

author: weiya[email protected]

date: Dec. 27, 2017

In [2]:
%matplotlib inline

In [3]:
import numpy as np
import sklearn.datasets
import sklearn.linear_model
import matplotlib.pyplot as plt
import matplotlib

In [4]:
# generate a dataset
np.random.seed(123)
X, y = sklearn.datasets.make_moons(200, noise=0.20)
plt.scatter(X[:, 0], X[:, 1], s=40, c=y, cmap = plt.cm.Spectral)

Out[4]:
<matplotlib.collections.PathCollection at 0x7faecd5e27d0>

Predictions $$\begin{array}{ll} z_m & = \sigma(\alpha_{0m}+\alpha_m^TX)\qquad m=1,\ldots, M\\ T_k &= \beta_{0k}+\beta_k^TZ\qquad k=1,\ldots, K\\ f_k(X)&=g_k(T)=\frac{exp(T_k)}{\sum_{\ell=1}^Kexp(T_\ell)} \end{array}$$

Loss function $$L = -\frac 1N \sum\limits_{i=1}^NL_i$$ where $$\begin{array}{ll} L_i& = \sum\limits_{k=1}^Ky_{ik}logf_k(x_i) \\ &=\sum\limits_{k=1}^Ky_{ik}(T_k-log(\sum_{\ell=1}^Kexp(T_\ell)))\\ &=\sum\limits_{k=1}^Ky_{ik}T_k-log(\sum_{\ell=1}^Kexp(T_\ell))\\ \end{array}$$

BP formula $$\begin{array}{lll} \frac{\partial L_i}{\partial \beta_{km}} &=& \frac{\partial L_i}{\partial T_k}\frac{\partial T_k}{\partial \beta_{km}}\\ &=&(y_{ik}-f_k(x_i))z_{mi}\\ &=&\delta_{ki}z_{mi} \end{array}$$ Thus, we have $\frac{\partial L_i}{\partial \beta_{k0}}=\delta_{ki}, \frac{\partial L_i}{\partial \beta_{km}}=\delta_{ki}z_{mi}$ $$\begin{array}{lll} \frac{\partial L_i}{\partial \alpha_{m\ell}} &=&\sum\limits_{k=1}^K \frac{\partial L_i}{\partial T_k}\frac{\partial T_k}{\partial z_{m}}\frac{\partial z_m}{\partial \alpha_{m\ell}}\\ &=&\sigma'(\alpha_{0m}+\alpha_m^TX)\sum\limits_{k=1}^K\delta_{ki}\beta_{km}x_{i\ell}\\ &=&s_{mi}x_{i\ell} \end{array}$$ Thus, we have $\frac{\partial L_i}{\partial \alpha_{m\ell}}=s_{mi}, \frac{\partial L_i}{\partial \alpha_{m\ell}}=s_{mi}x_{i\ell}$

Here, $\sigma(v)=tanh(v)$, so then $tanh'v=1-tanh^2v$. Also, denote $\boldsymbol\beta=(\beta_{km})_{K\times M}, \boldsymbol\alpha=(\alpha_{m\ell})_{M\times L}$

In [5]:
num_examples = len(X)
nn_input_dim = 2
nn_output_dim = 2
# learning rate for gradient descent
gamma = 0.01
# regularization strength
reg_lambda = 0.01

In [6]:
# Helper function to evaluate the total loss on the dataset
def calculate_loss(model):
alpha, a, beta, b = model['alpha'], model['a'], model['beta'], model['b']
# forward pass to calculate the predictions
z = np.tanh(X.dot(alpha) + a) # N*L * L*M
T = np.exp(z.dot(beta) + b) # N*M * M*K
probs = T / np.sum(T, axis = 1, keepdims=True) # sum by row and get N*1
# calculate the loss
logprobs = -np.log(probs[range(num_examples), y])
data_loss = np.sum(logprobs)
# add regularization term
data_loss += reg_lambda/2 * (np.sum(np.square(alpha)) + np.sum(np.square(beta)))
return 1./num_examples * data_loss

In [13]:
# Helper function to predict an output (0 or 1)
def predict(model, x):
alpha, a, beta, b = model['alpha'], model['a'], model['beta'], model['b']
# forward pass
z = np.tanh(x.dot(alpha) + a) # N*L * L*M
T = np.exp(z.dot(beta) + b) # N*M * M*K
probs = T / np.sum(T, axis = 1, keepdims=True) # sum by row and get N*1
return np.argmax(probs, axis=1)

In [14]:
# Helper function to plot a decision boundary
def plot_decision_boundary(pred_func):
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.01
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap = plt.cm.Spectral)

In [16]:
# model
def build_model(nn_hdim, num_passes = 2000, print_loss = False):
# initialize
np.random.seed(123)
alpha = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim)
a = np.zeros((1, nn_hdim))
beta = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)
b = np.zeros((1, nn_output_dim))

model = {}

for i in xrange(0, num_passes):

# forward pass
z = np.tanh(X.dot(alpha) + a) # N*L * L*M
T = np.exp(z.dot(beta) + b) # N*M * M*K
probs = T / np.sum(T, axis = 1, keepdims=True)

# back propagation
delta = probs
delta[range(num_examples), y] -= 1 # pay attention! because y_ik = 1 for the corresponding class
dbeta = (z.T).dot(delta)
db = np.sum(delta, axis=0, keepdims = True)
s = delta.dot(beta.T) * (1-np.power(z, 2))
dalpha = np.dot(X.T, s)
da = np.sum(s, axis=0)

dbeta += reg_lambda * beta
dalpha += reg_lambda * alpha

alpha += -gamma * dalpha
a += -gamma * da
beta += -gamma * dbeta
b += -gamma *db

# assign new parameters to the model
model = {'alpha': alpha, 'a': a, 'beta': beta, 'b':b}

if print_loss and i %100 ==0:
print "Loss after iteration %i: %f" %(i, calculate_loss(model))
return model

In [17]:
# build model
model = build_model(3, print_loss=True)

Loss after iteration 0: 0.604434
Loss after iteration 100: 0.302863
Loss after iteration 200: 0.125740
Loss after iteration 300: 0.083622
Loss after iteration 400: 0.078040
Loss after iteration 500: 0.075535
Loss after iteration 600: 0.073808
Loss after iteration 700: 0.072311
Loss after iteration 800: 0.070842
Loss after iteration 900: 0.069394
Loss after iteration 1000: 0.068051
Loss after iteration 1100: 0.066872
Loss after iteration 1200: 0.065865
Loss after iteration 1300: 0.065013
Loss after iteration 1400: 0.064316
Loss after iteration 1500: 0.066696
Loss after iteration 1600: 0.065865
Loss after iteration 1700: 0.065327
Loss after iteration 1800: 0.064931
Loss after iteration 1900: 0.064617

In [18]:
# plot
plot_decision_boundary(lambda x: predict(model, x))
plt.title("Decision Boundary for hidden layer size 3")

/home/weiya/anaconda2/lib/python2.7/site-packages/numpy/ma/core.py:6385: MaskedArrayFutureWarning: In the future the default for ma.minimum.reduce will be axis=0, not the current None, to match np.minimum.reduce. Explicitly pass 0 or None to silence this warning.
return self.reduce(a)
/home/weiya/anaconda2/lib/python2.7/site-packages/numpy/ma/core.py:6385: MaskedArrayFutureWarning: In the future the default for ma.maximum.reduce will be axis=0, not the current None, to match np.maximum.reduce. Explicitly pass 0 or None to silence this warning.
return self.reduce(a)

Out[18]:
<matplotlib.text.Text at 0x7faecd4b9d90>