import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml # MNIST data
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
# load MNIST data from https://www.openml.org/d/554
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
X = X.to_numpy()
y = y.to_numpy()
# generate training and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
plt.subplots(1, 6, figsize=(15,5))
for i in range(6):
index = 9000 + i # image number
pixels = np.array(X_train[index], dtype='uint8')
pixels = pixels.reshape((28, 28))
plt.subplot(1, 6, i+1)
plt.title('Label is {label}'.format(label=y_train[index]))
plt.imshow(pixels, cmap='gray')
plt.xticks(())
plt.yticks(())
# apply logistic regressor with 'sag' solver, C is the inverse regularization strength
clf = LogisticRegression(multi_class='multinomial',
penalty='none', solver='sag', tol=0.1)
# fit data
clf.fit(X_train, y_train)
LogisticRegression(multi_class='multinomial', penalty='none', solver='sag', tol=0.1)
#Test the model
predictions = clf.predict(X_test)
#Precision, recall, f1-score
print(classification_report(y_test, predictions))
print(accuracy_score(y_test, predictions))
precision recall f1-score support 0 0.96 0.97 0.97 1396 1 0.97 0.97 0.97 1552 2 0.90 0.90 0.90 1442 3 0.90 0.88 0.89 1390 4 0.92 0.94 0.93 1377 5 0.90 0.87 0.88 1312 6 0.95 0.96 0.95 1371 7 0.96 0.92 0.94 1453 8 0.85 0.90 0.88 1356 9 0.90 0.91 0.90 1351 accuracy 0.92 14000 macro avg 0.92 0.92 0.92 14000 weighted avg 0.92 0.92 0.92 14000 0.9209285714285714
import png
filename = "mnist_my_digit_3.png"
image = np.zeros((1, 28, 28, 1), dtype=np.uint8)
pngdata = png.Reader(open(filename, 'rb')).asDirect()
for i_row, row in enumerate(pngdata[2]):
image[0, i_row, :, 0] = row
plt.imshow(np.squeeze(image), cmap="gray")
plt.xticks(())
plt.yticks(())
plt.show()
# one digit, -1: unspecified number determined by numpy
my_X = image.reshape((1,-1))
probabilities = clf.predict_proba(my_X)
prediction = np.argmax(probabilities)
print(probabilities)
print(f"prediction = {prediction}")