#!/usr/bin/env python # coding: utf-8 # # 11 - Neural Networks in Keras # # by [Fabio A. González](http://dis.unal.edu.co/~fgonza/), Universidad Nacional de Colombia # # version 1.0, June 2018 # # ## Part of the class [Applied Deep Learning](https://github.com/albahnsen/AppliedDeepLearningClass) # # # This notebook is licensed under a [Creative Commons Attribution-ShareAlike 3.0 Unported License](http://creativecommons.org/licenses/by-sa/3.0/deed.en_US). # # # In[1]: import numpy as np import pylab as pl from sklearn.datasets.samples_generator import make_moons import keras get_ipython().run_line_magic('matplotlib', 'inline') # Functions for plotting 2D data and decision regions def plot_data(X, y): y_unique = np.unique(y) colors = pl.cm.rainbow(np.linspace(0.0, 1.0, y_unique.size)) for this_y, color in zip(y_unique, colors): this_X = X[y == this_y] pl.scatter(this_X[:, 0], this_X[:, 1], c=color, alpha=0.5, edgecolor='k', label="Class %s" % this_y) pl.legend(loc="best") pl.title("Data") def plot_decision_region(X, pred_fun): min_x = np.min(X[:, 0]) max_x = np.max(X[:, 0]) min_y = np.min(X[:, 1]) max_y = np.max(X[:, 1]) min_x = min_x - (max_x - min_x) * 0.05 max_x = max_x + (max_x - min_x) * 0.05 min_y = min_y - (max_y - min_y) * 0.05 max_y = max_y + (max_y - min_y) * 0.05 x_vals = np.linspace(min_x, max_x, 30) y_vals = np.linspace(min_y, max_y, 30) XX, YY = np.meshgrid(x_vals, y_vals) grid_r, grid_c = XX.shape ZZ = np.zeros((grid_r, grid_c)) for i in range(grid_r): for j in range(grid_c): ZZ[i, j] = pred_fun(XX[i, j], YY[i, j]) pl.contourf(XX, YY, ZZ, 30, cmap = pl.cm.coolwarm, vmin= 0, vmax=1) pl.colorbar() pl.xlabel("x") pl.ylabel("y") # ### 1. Neural network training in Keras # # We will build a multilayer network to solve the following classification problem: # In[2]: x_train, y_train = make_moons(n_samples=1000, noise= 0.2, random_state=3) pl.figure(figsize=(8, 6)) plot_data(x_train, y_train) # We define a `Sequential` model. The model is built by stacking layers. The first layer is a dense layer that receives 2 inputs and has 20 neurons with `relu` activation. The second and last layer has a neuron with sigmoid activation. # In[3]: from keras.models import Sequential from keras.layers import Dense, Activation model = Sequential() model.add(Dense(units=20, input_dim=2)) model.add(Activation('relu')) model.add(Dense(units=1)) model.add(Activation('sigmoid')) # The structured of the neural network can be visualized with: # In[4]: model.summary(70) # The learning process is configured with `compile`. Here we will use a cross-entropy loss function and an SGD optimizer. A the learning process will keep track of the accuracy. # In[5]: from keras.optimizers import SGD model.compile(loss = 'binary_crossentropy', optimizer = SGD(lr=0.01, momentum=0.9, decay=0.0, nesterov=False), metrics = ['accuracy']) # The model is trained by calling the function `fit`: # In[6]: model.fit(x_train, y_train, epochs=100, verbose=2) # The trained model can be use to classify new samples # using `predict`: # In[7]: print(model.predict(np.array([[0, 0]]))) # This is the decision region of the trained model: # In[8]: def pred_fun(x1, x2): xval = np.array([[x1, x2]]) return model.predict(xval)[0, 0] pl.figure(figsize = (8,16/3)) plot_decision_region(x_train, pred_fun) plot_data(x_train, y_train) # ### 2. Training a text classifier # We will create a model to classifiy Reuters newswires # according to their topic. The original dataset is available [here](https://archive.ics.uci.edu/ml/datasets/reuters-21578+text+categorization+collection), but Keras has functions that facilitate the task of loading and preprocessing it. # In[31]: from keras.datasets import reuters from keras.layers import Dropout from keras.preprocessing.text import Tokenizer max_words = 1000 print('Loading data...') (x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words, test_split=0.2) print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') num_classes = np.max(y_train) + 1 print(num_classes, 'classes') # Is instance is represented by a sequence of numbers which correspond to indices of the corresponding words. # In[32]: print(x_train[101]) # The map for word to index is also provided, so it is possible to decode the instance back to text: # In[33]: word_index = reuters.get_word_index(path="reuters_word_index.json") num_words = max(word_index.values()) + 1 words = ['']*num_words for word in word_index: words[word_index[word]] = word print([words[i-2] for i in x_train[101][1:]]) # Data is encoded using a bag of words representation, i.e. a matrix of size num of instances by vocabulary size # In[34]: tokenizer = Tokenizer(num_words=max_words) x_train = tokenizer.sequences_to_matrix(x_train, mode='binary') x_test = tokenizer.sequences_to_matrix(x_test, mode='binary') print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) # Labels are converted to one-hot representation: # In[35]: y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) print('y_train shape:', y_train.shape) print('y_test shape:', y_test.shape) # Now we build a dense neural network with one hidden layer: # In[37]: model = Sequential() model.add(Dense(256, input_shape=(max_words,))) model.add(Activation('sigmoid')) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.summary(70) # We configure the learning task to use SGD as optimization: # In[38]: model.compile(loss='categorical_crossentropy', optimizer=SGD(), metrics=['accuracy']) # Now we can train the model using a 0.1 training validation split: # In[39]: batch_size = 32 epochs = 5 history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.1) # Finally, we test the performance over the test set: # In[40]: score = model.evaluate(x_test, y_test) print('Test score:', score[0]) print('Test accuracy:', score[1]) # In[ ]: