#!/usr/bin/env python
# coding: utf-8

# This is similar to the 'NN' notebook, but will use convolutional neural networks instead of normal neural nets on flattened arrays of the data.

# In[1]:


# Standard scientific Python imports
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')

# Import datasets, classifiers and performance metrics
from sklearn import datasets, svm, metrics

# The digits dataset
digits = datasets.load_digits()

# The data that we are interested in is made of 8x8 images of digits, let's
# have a look at the first 4 images, stored in the `images` attribute of the
# dataset.  If we were working from image files, we could load them using
# matplotlib.pyplot.imread.  Note that each image must have the same size. For these
# images, we know which digit they represent: it is given in the 'target' of
# the dataset.


# In[2]:


images_and_labels = list(zip(digits.images, digits.target))
for index, (image, label) in enumerate(images_and_labels[:4]):
    plt.subplot(2, 4, index + 1)
    plt.axis('off')
    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
    plt.title('Training: %i' % label)


# In[3]:


# To apply a classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
from keras.utils.np_utils import to_categorical
digits.target_cat = to_categorical(digits.target)

n_samples = len(digits.images)
data = digits.images
X_train = data[:n_samples // 2]
X_test = data[n_samples // 2:]
y_train = digits.target_cat[:n_samples // 2]
y_test = digits.target_cat[n_samples // 2:]


# In[4]:


fig, axes = plt.subplots(nrows=3, ncols=5)
for j in range(3):
    for i in range(5):
        axes[j, i].imshow(X_train[j*5 + i])
        axes[j, i].set_xticklabels([])
        axes[j, i].set_yticklabels([])
plt.tight_layout()


# In[5]:


X_train2 = X_train.reshape(X_train.shape[0], 1, X_train.shape[1], X_train.shape[2]) #reshape to have depth of 1
X_test2 = X_test.reshape(X_test.shape[0], 1, X_test.shape[1], X_test.shape[2]) #need that for convolutions
X_train2 = X_train2.astype('float32') #make it float
X_test2 = X_test2.astype('float32')
X_train2 /= 16 #scale 0 to 1
X_test2 /= 16


# Using https://elitedatascience.com/keras-tutorial-deep-learning-in-python as a guide

# In[6]:


from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D


# In[7]:


from keras.callbacks import EarlyStopping
from numpy.random import seed
seed(8)
model = Sequential()
model.add(Conv2D(10, (2, 2), activation='relu', input_shape=X_train2.shape[1:], data_format='channels_first'))
model.add(Conv2D(16, 2, 2, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(10, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train2, y_train, validation_split=0.3, epochs=50, callbacks=[EarlyStopping(patience=2)])


# In[8]:


predictions = model.predict(X_test2)
# The classification report required 1D arrays, so I'll convert everything to max probability
from numpy import argmax
predictions1D = argmax(predictions, axis=1) #should have shape (899,)
y_test1D = argmax(y_test, axis=1)
report = metrics.classification_report(y_test1D, predictions1D)

import pandas as pd #for formatting
df = pd.DataFrame([x.split() for x in report.replace('/', '\n').split('\n')]) #have a problematic slash in there
from numpy import concatenate, array
df.columns = concatenate([array(['Digit']), df.iloc[0,:-1]])
df.drop(df.index[:2], inplace=True)
df.set_index('Digit', inplace=True)
df.drop(df.index[pd.isnull(df.index.values)], inplace=True)
df


# This looks like it stabilized around 90% accuracy. 

# In[9]:


sum(predictions1D == y_test1D)/len(y_test1D)


# This is a little better than the other neural network but still much worse than, say, SVM, which got to 97%.

# In[ ]: