%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
import scipy.io
data = scipy.io.loadmat('ex3data1.mat')
# pick random 100 handwriting
import random
indexes = random.sample(range(0, 5000), 100)
figure = plt.figure(figsize=(10, 10))
for index, i in enumerate(indexes):
plt.subplot(10, 10, index + 1)
plt.axis('off')
plt.imshow(data['X'][i].reshape(20, 20).transpose(), cmap='Greys')
plt.show()
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
clf = OneVsRestClassifier(LogisticRegression(penalty='l2', C=1))
clf.fit(data['X'], data['y'])
print clf.score(data['X'], data['y'])
0.9438
/usr/local/lib/python2.7/site-packages/sklearn/utils/__init__.py:93: DeprecationWarning: Function multilabel_ is deprecated; Attribute multilabel_ is deprecated and will be removed in 0.17. Use 'y_type_.startswith('multilabel')' instead warnings.warn(msg, category=DeprecationWarning)