#!/usr/bin/env python # coding: utf-8 # Deep Learning with TensorFlow # ============= # # Credits: Forked from [TensorFlow](https://github.com/tensorflow/tensorflow) by Google # # Setup # ------------ # # Refer to the [setup instructions](https://github.com/donnemartin/data-science-ipython-notebooks/tree/feature/deep-learning/deep-learning/tensor-flow-exercises/README.md). # # Exercise 3 # ------------ # # Previously in `2_fullyconnected.ipynb`, you trained a logistic regression and a neural network model. # # The goal of this exercise is to explore regularization techniques. # In[ ]: # These are all the modules we'll be using later. Make sure you can import them # before proceeding further. import cPickle as pickle import numpy as np import tensorflow as tf # First reload the data we generated in _notmist.ipynb_. # In[ ]: pickle_file = 'notMNIST.pickle' with open(pickle_file, 'rb') as f: save = pickle.load(f) train_dataset = save['train_dataset'] train_labels = save['train_labels'] valid_dataset = save['valid_dataset'] valid_labels = save['valid_labels'] test_dataset = save['test_dataset'] test_labels = save['test_labels'] del save # hint to help gc free up memory print 'Training set', train_dataset.shape, train_labels.shape print 'Validation set', valid_dataset.shape, valid_labels.shape print 'Test set', test_dataset.shape, test_labels.shape # Reformat into a shape that's more adapted to the models we're going to train: # - data as a flat matrix, # - labels as float 1-hot encodings. # In[ ]: image_size = 28 num_labels = 10 def reformat(dataset, labels): dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32) # Map 2 to [0.0, 1.0, 0.0 ...], 3 to [0.0, 0.0, 1.0 ...] labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32) return dataset, labels train_dataset, train_labels = reformat(train_dataset, train_labels) valid_dataset, valid_labels = reformat(valid_dataset, valid_labels) test_dataset, test_labels = reformat(test_dataset, test_labels) print 'Training set', train_dataset.shape, train_labels.shape print 'Validation set', valid_dataset.shape, valid_labels.shape print 'Test set', test_dataset.shape, test_labels.shape # In[ ]: def accuracy(predictions, labels): return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0]) # --- # Problem 1 # --------- # # Introduce and tune L2 regularization for both logistic and neural network models. Remember that L2 amounts to adding a penalty on the norm of the weights to the loss. In TensorFlow, you can compue the L2 loss for a tensor `t` using `nn.l2_loss(t)`. The right amount of regularization should improve your validation / test accuracy. # # --- # --- # Problem 2 # --------- # Let's demonstrate an extreme case of overfitting. Restrict your training data to just a few batches. What happens? # # --- # --- # Problem 3 # --------- # Introduce Dropout on the hidden layer of the neural network. Remember: Dropout should only be introduced during training, not evaluation, otherwise your evaluation results would be stochastic as well. TensorFlow provides `nn.dropout()` for that, but you have to make sure it's only inserted during training. # # What happens to our extreme overfitting case? # # --- # --- # Problem 4 # --------- # # Try to get the best performance you can using a multi-layer model! The best reported test accuracy using a deep network is [97.1%](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html?showComment=1391023266211#c8758720086795711595). # # One avenue you can explore is to add multiple layers. # # Another one is to use learning rate decay: # # global_step = tf.Variable(0) # count the number of steps taken. # learning_rate = tf.train.exponential_decay(0.5, step, ...) # optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step) # # --- #