#!/usr/bin/env python # coding: utf-8 # # MNIST with SciKit-Learn and skorch # # This notebooks shows how to define and train a simple Neural-Network with PyTorch and use it via skorch with SciKit-Learn. # In[1]: from sklearn.datasets import fetch_mldata from sklearn.model_selection import train_test_split import numpy as np # ## Loading Data # Using SciKit-Learns ```fetch_mldata``` to load MNIST data. # In[2]: mnist = fetch_mldata('MNIST original') # In[3]: mnist # In[4]: mnist.data.shape # ## Preprocessing Data # # Each image of the MNIST dataset is encoded in a 784 dimensional vector, representing a 28 x 28 pixel image. Each pixel has a value between 0 and 255, corresponding to the grey-value of a pixel.
# The above ```featch_mldata``` method to load MNIST returns ```data``` and ```target``` as ```uint8``` which we convert to ```float32``` and ```int64``` respectively. # In[5]: X = mnist.data.astype('float32') y = mnist.target.astype('int64') # As we will use ReLU as activation in combination with softmax over the output layer, we need to scale `X` down. An often use range is [0, 1]. # In[6]: X /= 255.0 # In[7]: X.min(), X.max() # Note: data is not normalized. # In[8]: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) # In[9]: assert(X_train.shape[0] + X_test.shape[0] == mnist.data.shape[0]) # In[10]: X_train.shape, y_train.shape # ## Build Neural Network with Torch # Simple, fully connected neural network with one hidden layer. Input layer has 784 dimensions (28x28), hidden layer has 98 (= 784 / 8) and output layer 10 neurons, representing digits 0 - 9. # In[11]: import torch from torch import nn import torch.nn.functional as F # In[12]: torch.manual_seed(0); # In[13]: mnist_dim = X.shape[1] hidden_dim = int(mnist_dim/8) output_dim = len(np.unique(mnist.target)) # In[14]: mnist_dim, hidden_dim, output_dim # A Neural network in PyTorch's framework. # In[15]: class ClassifierModule(nn.Module): def __init__( self, input_dim=mnist_dim, hidden_dim=hidden_dim, output_dim=output_dim, dropout=0.5, ): super(ClassifierModule, self).__init__() self.dropout = nn.Dropout(dropout) self.hidden = nn.Linear(input_dim, hidden_dim) self.output = nn.Linear(hidden_dim, output_dim) def forward(self, X, **kwargs): X = F.relu(self.hidden(X)) X = self.dropout(X) X = F.softmax(self.output(X), dim=-1) return X # Skorch allows to use PyTorch's networks in the SciKit-Learn setting. # In[16]: from skorch.net import NeuralNetClassifier # In[17]: net = NeuralNetClassifier( ClassifierModule, max_epochs=20, lr=0.1, # device='cuda', # uncomment this to train with CUDA ) # In[18]: net.fit(X_train, y_train); # ## Prediction # In[19]: predicted = net.predict(X_test) # In[20]: np.mean(predicted == y_test) # An accuracy of nearly 96% for a network with only one hidden layer is not too bad # # Convolutional Network # PyTorch expects a 4 dimensional tensor as input for its 2D convolution layer. The dimensions represent: # * Batch size # * Number of channel # * Height # * Width # # As initial batch size the number of examples needs to be provided. MNIST data has only one channel. As stated above, each MNIST vector represents a 28x28 pixel image. Hence, the resulting shape for PyTorch tensor needs to be (x, 1, 28, 28). # In[21]: XCnn = X.reshape(-1, 1, 28, 28) # In[22]: XCnn.shape # In[23]: XCnn_train, XCnn_test, y_train, y_test = train_test_split(XCnn, y, test_size=0.25, random_state=42) # In[24]: XCnn_train.shape, y_train.shape # In[25]: class Cnn(nn.Module): def __init__(self): super(Cnn, self).__init__() self.conv1 = nn.Conv2d(1, 32, kernel_size=3) self.conv2 = nn.Conv2d(32, 64, kernel_size=3) self.conv2_drop = nn.Dropout2d() self.fc1 = nn.Linear(1600, 128) # 1600 = number channels * width * height self.fc2 = nn.Linear(128, 10) def forward(self, x): x = F.relu(F.max_pool2d(self.conv1(x), 2)) x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) x = x.view(-1, x.size(1) * x.size(2) * x.size(3)) # flatten over channel, height and width = 1600 x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) x = F.softmax(x, dim=-1) return x # In[26]: cnn = NeuralNetClassifier( Cnn, max_epochs=15, lr=1, optimizer=torch.optim.Adadelta, # device='cuda', # uncomment this to train with CUDA ) # In[27]: cnn.fit(XCnn_train, y_train); # In[28]: cnn_pred = cnn.predict(XCnn_test) # In[29]: np.mean(cnn_pred == y_test) # An accuracy of 99.1% should suffice for this example!