#!/usr/bin/env python
# coding: utf-8

# # 11 - Neural Networks in Keras
# 
# by [Fabio A. González](http://dis.unal.edu.co/~fgonza/), Universidad Nacional de Colombia
# 
# version 1.0, June 2018
# 
# ## Part of the class [Applied Deep Learning](https://github.com/albahnsen/AppliedDeepLearningClass)
# 
# 
# This notebook is licensed under a [Creative Commons Attribution-ShareAlike 3.0 Unported License](http://creativecommons.org/licenses/by-sa/3.0/deed.en_US). 
# 
# 

# In[1]:


import numpy as np
import pylab as pl
from sklearn.datasets.samples_generator import make_moons
import keras

get_ipython().run_line_magic('matplotlib', 'inline')

# Functions for plotting 2D data and decision regions

def plot_data(X, y):
    y_unique = np.unique(y)
    colors = pl.cm.rainbow(np.linspace(0.0, 1.0, y_unique.size))
    for this_y, color in zip(y_unique, colors):
        this_X = X[y == this_y]
        pl.scatter(this_X[:, 0], this_X[:, 1],  c=color,
                    alpha=0.5, edgecolor='k',
                    label="Class %s" % this_y)
    pl.legend(loc="best")
    pl.title("Data")

def plot_decision_region(X, pred_fun):
    min_x = np.min(X[:, 0])
    max_x = np.max(X[:, 0])
    min_y = np.min(X[:, 1])
    max_y = np.max(X[:, 1])
    min_x = min_x - (max_x - min_x) * 0.05
    max_x = max_x + (max_x - min_x) * 0.05
    min_y = min_y - (max_y - min_y) * 0.05
    max_y = max_y + (max_y - min_y) * 0.05
    x_vals = np.linspace(min_x, max_x, 30)
    y_vals = np.linspace(min_y, max_y, 30)
    XX, YY = np.meshgrid(x_vals, y_vals)
    grid_r, grid_c = XX.shape
    ZZ = np.zeros((grid_r, grid_c))
    for i in range(grid_r):
        for j in range(grid_c):
            ZZ[i, j] = pred_fun(XX[i, j], YY[i, j])
    pl.contourf(XX, YY, ZZ, 30, cmap = pl.cm.coolwarm, vmin= 0, vmax=1)
    pl.colorbar()
    pl.xlabel("x")
    pl.ylabel("y")


# ### 1. Neural network training in Keras
# 
# We will build a multilayer network to solve the following classification problem:

# In[2]:


x_train, y_train = make_moons(n_samples=1000, noise= 0.2, random_state=3)
pl.figure(figsize=(8, 6))
plot_data(x_train, y_train)


# We define a `Sequential` model. The model is built by stacking layers. The first layer is a dense layer that receives 2 inputs and has 20 neurons with `relu` activation. The second and last layer has a neuron with sigmoid activation.

# In[3]:


from keras.models import Sequential
from keras.layers import Dense, Activation

model = Sequential()

model.add(Dense(units=20, input_dim=2))
model.add(Activation('relu'))
model.add(Dense(units=1))
model.add(Activation('sigmoid'))


# The structured of the neural network can be visualized with:

# In[4]:


model.summary(70)


# The learning process is configured with `compile`. Here we will use a cross-entropy loss function and an SGD optimizer. A the learning process will keep track of the accuracy.

# In[5]:


from keras.optimizers import SGD
model.compile(loss = 'binary_crossentropy',
              optimizer = SGD(lr=0.01, momentum=0.9, decay=0.0, nesterov=False),
              metrics = ['accuracy'])


# The model is trained by calling the function `fit`:

# In[6]:


model.fit(x_train, y_train, epochs=100, verbose=2)


# The trained model can be use to classify new samples 
# using `predict`:

# In[7]:


print(model.predict(np.array([[0, 0]])))


# This is the decision region of the trained model:

# In[8]:


def pred_fun(x1, x2):
        xval = np.array([[x1, x2]])
        return model.predict(xval)[0, 0]

pl.figure(figsize = (8,16/3))    
plot_decision_region(x_train, pred_fun)
plot_data(x_train, y_train)


# ### 2. Training a text classifier

# We will create a model to classifiy Reuters newswires
# according to their topic. The original dataset is available [here](https://archive.ics.uci.edu/ml/datasets/reuters-21578+text+categorization+collection), but Keras has functions that facilitate the task of loading and preprocessing it.

# In[31]:


from keras.datasets import reuters
from keras.layers import Dropout
from keras.preprocessing.text import Tokenizer

max_words = 1000

print('Loading data...')
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words,
                                                         test_split=0.2)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

num_classes = np.max(y_train) + 1
print(num_classes, 'classes')


# Is instance is represented by a sequence of numbers which correspond to indices of the corresponding words.

# In[32]:


print(x_train[101])


# The map for word to index is also provided, so it is possible to decode the instance back to text:

# In[33]:


word_index = reuters.get_word_index(path="reuters_word_index.json")
num_words = max(word_index.values()) + 1
words = ['']*num_words
for word in word_index:
    words[word_index[word]] = word
print([words[i-2] for i in x_train[101][1:]])


# Data is encoded using a bag of words representation, i.e. a matrix of size num of instances by vocabulary size

# In[34]:


tokenizer = Tokenizer(num_words=max_words)
x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)


# Labels are converted to one-hot representation:

# In[35]:


y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)


# Now we build a dense neural network with one hidden layer:

# In[37]:


model = Sequential()
model.add(Dense(256, input_shape=(max_words,)))
model.add(Activation('sigmoid'))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
model.summary(70)


# We configure the learning task to use SGD as optimization:

# In[38]:


model.compile(loss='categorical_crossentropy',
              optimizer=SGD(),
              metrics=['accuracy'])


# Now we can train the model using a 0.1 training validation split:

# In[39]:


batch_size = 32
epochs = 5
history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_split=0.1)


# Finally, we test the performance over the test set:

# In[40]:


score = model.evaluate(x_test, y_test)
print('Test score:', score[0])
print('Test accuracy:', score[1])


# In[ ]: