#!/usr/bin/env python
# coding: utf-8

# # Simple Neural Network
# A simple 3 layer classifier in python - with no bias or regularization.
# Created on a [vnode](https://github.com/thomaswilley/vnode) :)
# 
# @thomaswilley

# In[1]:


import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_digits
get_ipython().run_line_magic('matplotlib', 'inline')

src_X, src_y = load_digits(return_X_y=True)
m, dim_input = src_X.shape
dim_hidden = 100
dim_output = 10
m_train = 1000

X_train = src_X[0:m_train, :] / 255
X_test = src_X[m_train+1::, :]/ 255

y = np.zeros((m, 10))
y[np.arange(m), src_y]=1
y_train = y[0:m_train, :]
y_test = y[m_train+1::, :]

img_dim = int(np.sqrt(src_X.shape[1]))
images_and_labels = list(zip(src_X.reshape(m, img_dim, img_dim), src_y))
for index, (image, label) in enumerate(images_and_labels[:10]):
    plt.subplot(1, 10, index + 1)
    plt.axis('off')
    plt.imshow(image, cmap=plt.cm.gray_r)
    plt.title('{}'.format(label))


# In[2]:


def nonlin_sigmoid(x, deriv=False):
    if deriv:
        return x * (1. - x)
    return 1. / (1 + np.exp(-x))

def nonlin_relu(x, deriv=False):
    if deriv:
        return 1. * (x > 0)
    return np.maximum(x, 0)

def forward(model, x):
    z1 = np.dot(model['w1'].T, x)
    a1 = model['nonlin'](z1)
    z2 = np.dot(model['w2'].T, a1)
    a2 = model['nonlin'](z2)
    return (z1, a1, z2, a2)

def backward(model, x, z1, a1, z2, a2, y):
    loss = a2 - y
    grad_w2 = np.dot(a1, loss.T)
    grad_a1 = np.dot(model['w2'], loss) * model['nonlin'](a1, deriv=True)
    grad_w1 = np.dot(x, grad_a1.T)
    return (loss, grad_w1, grad_w2)

def predict(model, x):
    return np.argmax(forward(model, x)[3])

def accuracy(model, X, y):
    correct = []
    m = X.shape[0]
    for i in range(m):
        y_hat = predict(model, X[i,:].reshape(-1, 1))
        _y = np.argmax(y[i,:].reshape(-1, 1))
        correct.append(y_hat == _y)
    return np.sum(1.*np.array(correct)) / m

def create_model(dim_input, dim_hidden, dim_output, nonlin=nonlin_sigmoid, epochs=30, learning_rate=0.9):    
    model = {}
    model['dims'] = (dim_input, dim_hidden, dim_output)
    model['nonlin'] = nonlin # which non linearity fn we use for the activations of hidden & output
    model['w1'] = np.random.random((dim_input, dim_hidden))
    model['w2'] = np.random.random((dim_hidden, dim_output))
    model['epochs'] = epochs
    model['learning_rate'] = learning_rate
    return model

def train_model(model):
    all_examples_err = []
    for e in range(model['epochs']):
        single_example_err = []
        for i in range(m_train):
            _x = X_train[i,:].reshape(-1, 1)
            _y = y_train[i,:].reshape(-1, 1)

            z1, a1, z2, a2 = forward(model, _x)
            loss, grad_w1, grad_w2 = backward(model, _x, z1, a1, z2, a2, _y)

            model['w2'] -= model['learning_rate'] * grad_w2
            model['w1'] -= model['learning_rate'] * grad_w1

            single_example_err.append(np.square(loss).sum())

        all_examples_err.append(np.sum(single_example_err) / m_train)
    model['error'] = all_examples_err
    return model

def plot_model_error(model, label="Model"):
    f, ax = plt.subplots()
    ax.plot(model['error'])
    ax.set_title("Model-{} Train accuracy: {:.02f}%\nTest accuracy: {:.02f}%".format(
        label,
        accuracy(model, X_train, y_train),
        accuracy(model, X_test, y_test)))


# In[3]:


model1 = create_model(dim_input, dim_hidden, dim_output, nonlin=nonlin_sigmoid, epochs=10, learning_rate=0.9)
model1 = train_model(model1)
plot_model_error(model1, "Sigmoid")

model2 = create_model(dim_input, dim_hidden, dim_output, nonlin=nonlin_relu, epochs=10, learning_rate=0.05)
model2 = train_model(model2)
plot_model_error(model2, "ReLU")


# In[4]:


# "create" an arbitrary digit and predict its label
example = np.array([[  0.,   0.,   2.,  1.,   1.,   1.,   0.,   0.],
           [  0.,   24.,  22.,  22.,  22.,  15.,   20.,   0.],
           [  0.,   0.,  0.,   0.,   0.,  0.,   22.,   0.],
           [  0.,   0.,  0.,   0.,   0.,  22.,   0.,   0.],
           [  0.,   0.,   0.,   0.,   22.,   0.,   0.,   0.],
           [  0.,   0.,  0.,   20.,   0.,  0.,   0.,   0.],
           [  0.,   0.,  0.,   20.,  1.,  0.,   0.,   0.],
           [  0.,   0.,   20.,  0.,  0.,   0.,   0.,   0.]])
f, ax = plt.subplots(figsize=(4,4))
ax.imshow(example, cmap=plt.cm.gray_r)
print("model 1 predicts {}\nmodel 2 predicts {}".format(
    predict(model1, example.reshape(-1, 1)),
    predict(model2, example.reshape(-1, 1))))


# In[ ]: