%pylab inline
import pylab
from sklearn.datasets import fetch_mldata
DATA_PATH = '~/data'
mnist = fetch_mldata('MNIST original', data_home=DATA_PATH)

mnist.data.shape

row = mnist.data[0,:] # First row of the array
col = mnist.data[:,0] # First column of the array

print row.shape
print col.shape

print row.sum(), row.max(), row.min()
print col.sum(), col.max(), col.min()

print mnist.data[:10,:] # First ten rows
print mnist.data[:,-10:] # Last ten columns

train = mnist.data[:60000]
test = mnist.data[60000:]

test_sample = None # Fix me

img = mnist.data[0]
print img

pylab.imshow(img, cmap="Greys")

pylab.imshow(img.reshape(28, 28), cmap="Greys")


%%time
from sklearn.neighbors import NearestNeighbors
model = NearestNeighbors(algorithm='brute').fit(train)

%%time
query_img = test[0]
_, result = model.kneighbors(query_img, n_neighbors=4)

print result

# Display several images in a row
def show(imgs, n=1):
    fig = pylab.figure()
    for i in xrange(0, n):
        fig.add_subplot(1, n, i, xticklabels=[], yticklabels=[])
        if n == 1:
            img = imgs
        else:
            img = imgs[i]
        pylab.imshow(img.reshape(28, 28), cmap="Greys")

show(query_img)
show(train[result[0],:], len(result[0]))


train_labels = mnist.target[:60000]
test_labels = mnist.target[60000:]
test_labels_sample = test_labels[::100]

%%time
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=4, algorithm='brute').fit(train, train_labels)

%%time
# Score the model!

preds = model.predict(test_sample)
errors = [i for i in xrange(0, len(test_sample)) if preds[i] != test_labels_sample[i]]

for i in errors:
    pass # Visualize error image and its nearest neighbors

test_sample = test[::10]
test_labels_sample = test_labels[::10]

def plot_cm(cm):
    pylab.matshow(np.log(cm))

from sklearn.metrics import confusion_matrix
# Compute and plot the confusion matrix for test_sample