%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [11, 7]
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
iris = load_iris()
df=pd.DataFrame(data= np.c_[iris['data'], iris['target']],
columns= list(iris['feature_names']) + ['target'])
sns.pairplot(df,hue='target')
<seaborn.axisgrid.PairGrid at 0x1a2e795fd0>
from sklearn import tree
clf = tree.DecisionTreeClassifier(random_state=0)
clf = clf.fit(iris.data, iris.target)
tree.plot_tree(clf)
[Text(306.90000000000003, 348.81, 'X[3] <= 0.8\nentropy = 0.667\nsamples = 150\nvalue = [50, 50, 50]'), Text(259.6846153846154, 285.39, 'entropy = 0.0\nsamples = 50\nvalue = [50, 0, 0]'), Text(354.11538461538464, 285.39, 'X[3] <= 1.75\nentropy = 0.5\nsamples = 100\nvalue = [0, 50, 50]'), Text(188.8615384615385, 221.97, 'X[2] <= 4.95\nentropy = 0.168\nsamples = 54\nvalue = [0, 49, 5]'), Text(94.43076923076924, 158.55, 'X[3] <= 1.65\nentropy = 0.041\nsamples = 48\nvalue = [0, 47, 1]'), Text(47.21538461538462, 95.13, 'entropy = 0.0\nsamples = 47\nvalue = [0, 47, 0]'), Text(141.64615384615388, 95.13, 'entropy = 0.0\nsamples = 1\nvalue = [0, 0, 1]'), Text(283.29230769230776, 158.55, 'X[3] <= 1.55\nentropy = 0.444\nsamples = 6\nvalue = [0, 2, 4]'), Text(236.0769230769231, 95.13, 'entropy = 0.0\nsamples = 3\nvalue = [0, 0, 3]'), Text(330.50769230769237, 95.13, 'X[2] <= 5.45\nentropy = 0.444\nsamples = 3\nvalue = [0, 2, 1]'), Text(283.29230769230776, 31.710000000000036, 'entropy = 0.0\nsamples = 2\nvalue = [0, 2, 0]'), Text(377.723076923077, 31.710000000000036, 'entropy = 0.0\nsamples = 1\nvalue = [0, 0, 1]'), Text(519.3692307692309, 221.97, 'X[2] <= 4.85\nentropy = 0.043\nsamples = 46\nvalue = [0, 1, 45]'), Text(472.1538461538462, 158.55, 'X[1] <= 3.1\nentropy = 0.444\nsamples = 3\nvalue = [0, 1, 2]'), Text(424.9384615384616, 95.13, 'entropy = 0.0\nsamples = 2\nvalue = [0, 0, 2]'), Text(519.3692307692309, 95.13, 'entropy = 0.0\nsamples = 1\nvalue = [0, 1, 0]'), Text(566.5846153846155, 158.55, 'entropy = 0.0\nsamples = 43\nvalue = [0, 0, 43]')]
# Parameters
n_classes = 3
plot_colors = "ryb"
plot_step = 0.02
for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3],
[1, 2], [1, 3], [2, 3]]):
# We only take the two corresponding features
X = iris.data[:, pair]
y = iris.target
clf = tree.DecisionTreeClassifier().fit(X, y)
# Plot the decision boundary
plt.subplot(2, 3, pairidx + 1)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
np.arange(y_min, y_max, plot_step))
plt.tight_layout(h_pad=0.5, w_pad=0.5, pad=2.5)
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.RdYlBu)
plt.xlabel(iris.feature_names[pair[0]])
plt.ylabel(iris.feature_names[pair[1]])
# Plot the training points
for i, color in zip(range(n_classes), plot_colors):
idx = np.where(y == i)
plt.scatter(X[idx, 0], X[idx, 1], c=color, label=iris.target_names[i],
cmap=plt.cm.RdYlBu, edgecolor='black', s=15)
plt.suptitle("Decision surface of a decision tree using paired features")
plt.legend(loc='lower right', borderpad=0, handletextpad=0)
plt.axis("tight")
plt.figure()
clf = tree.DecisionTreeClassifier().fit(iris.data, iris.target)
tree.plot_tree(clf, filled=True)
plt.show()
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
def plot_confusion_matrix(y_true, y_pred, classes,
normalize=False,
title=None,
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if not title:
if normalize:
title = 'Normalized confusion matrix'
else:
title = 'Confusion matrix, without normalization'
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
# Only use the labels that appear in the data
classes = classes[unique_labels(y_true, y_pred)]
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
# We want to show all ticks...
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
# ... and label them with the respective list entries
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
return ax
X = iris.data
y = iris.target
class_names = iris.target_names
# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
classifier = svm.SVC(kernel='linear', C=0.01)
y_pred = classifier.fit(X_train, y_train).predict(X_test)
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plot_confusion_matrix(y_test, y_pred, classes=class_names,
title='Confusion matrix, without normalization')
# Plot normalized confusion matrix
plot_confusion_matrix(y_test, y_pred, classes=class_names, normalize=True,
title='Normalized confusion matrix')
plt.show()
Confusion matrix, without normalization [[13 0 0] [ 0 10 6] [ 0 0 9]] Normalized confusion matrix [[1. 0. 0. ] [0. 0.62 0.38] [0. 0. 1. ]]
classifier = tree.DecisionTreeClassifier(max_depth=10)
y_pred = classifier.fit(X_train, y_train).predict(X_test)
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plot_confusion_matrix(y_test, y_pred, classes=class_names,
title='Confusion matrix, without normalization')
# Plot normalized confusion matrix
plot_confusion_matrix(y_test, y_pred, classes=class_names, normalize=True,
title='Normalized confusion matrix')
plt.show()
tree.plot_tree(classifier, filled=True)
plt.show()
Confusion matrix, without normalization [[13 0 0] [ 0 15 1] [ 0 0 9]] Normalized confusion matrix [[1. 0. 0. ] [0. 0.94 0.06] [0. 0. 1. ]]
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier()
y_pred = classifier.fit(X_train, y_train).predict(X_test)
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plot_confusion_matrix(y_test, y_pred, classes=class_names,
title='Confusion matrix, without normalization')
# Plot normalized confusion matrix
plot_confusion_matrix(y_test, y_pred, classes=class_names, normalize=True,
title='Normalized confusion matrix')
plt.show()
plt.show()
/Users/roman/anaconda3/lib/python3.7/site-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22. "10 in version 0.20 to 100 in 0.22.", FutureWarning)
Confusion matrix, without normalization [[13 0 0] [ 0 15 1] [ 0 0 9]] Normalized confusion matrix [[1. 0. 0. ] [0. 0.94 0.06] [0. 0. 1. ]]
from sklearn import datasets, svm, metrics
# The digits dataset
digits = datasets.load_digits()
# The data that we are interested in is made of 8x8 images of digits, let's
# have a look at the first 4 images, stored in the `images` attribute of the
# dataset. If we were working from image files, we could load them using
# matplotlib.pyplot.imread. Note that each image must have the same size. For these
# images, we know which digit they represent: it is given in the 'target' of
# the dataset.
images_and_labels = list(zip(digits.images, digits.target))
for index, (image, label) in enumerate(images_and_labels[:4]):
plt.subplot(2, 4, index + 1)
plt.axis('off')
plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
plt.title('Training: %i' % label)
# To apply a classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))
# Create a classifier: a support vector classifier
#classifier = svm.SVC(gamma=0.001)
#classifier = tree.DecisionTreeClassifier()
classifier = tree.DecisionTreeClassifier()
# We learn the digits on the first half of the digits
classifier.fit(data[:n_samples // 2], digits.target[:n_samples // 2])
# Now predict the value of the digit on the second half:
expected = digits.target[n_samples // 2:]
predicted = classifier.predict(data[n_samples // 2:])
print("Classification report for classifier %s:\n%s\n"
% (classifier, metrics.classification_report(expected, predicted)))
print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))
images_and_predictions = list(zip(digits.images[n_samples // 2:], predicted))
for index, (image, prediction) in enumerate(images_and_predictions[:4]):
plt.subplot(2, 4, index + 5)
plt.axis('off')
plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
plt.title('Prediction: %i' % prediction)
plt.show()
plot_confusion_matrix(expected, predicted, classes=digits.target,
title='Confusion matrix, without normalization')
Classification report for classifier DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None, max_features=None, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, presort=False, random_state=None, splitter='best'): precision recall f1-score support 0 0.91 0.90 0.90 88 1 0.83 0.64 0.72 91 2 0.91 0.74 0.82 86 3 0.69 0.75 0.72 91 4 0.86 0.78 0.82 92 5 0.61 0.77 0.68 91 6 0.89 0.92 0.91 91 7 0.90 0.79 0.84 89 8 0.59 0.64 0.61 88 9 0.63 0.73 0.67 92 accuracy 0.77 899 macro avg 0.78 0.77 0.77 899 weighted avg 0.78 0.77 0.77 899 Confusion matrix: [[79 0 0 1 3 2 0 0 0 3] [ 0 58 3 8 2 1 0 2 6 11] [ 1 3 64 8 0 1 2 0 5 2] [ 0 3 1 68 0 2 0 4 10 3] [ 6 0 0 0 72 4 4 0 4 2] [ 0 1 0 0 1 70 0 0 4 15] [ 1 1 0 0 3 0 84 0 2 0] [ 0 1 0 6 1 8 0 70 2 1] [ 0 2 2 4 2 13 4 2 56 3] [ 0 1 0 4 0 14 0 0 6 67]]
Confusion matrix, without normalization [[79 0 0 1 3 2 0 0 0 3] [ 0 58 3 8 2 1 0 2 6 11] [ 1 3 64 8 0 1 2 0 5 2] [ 0 3 1 68 0 2 0 4 10 3] [ 6 0 0 0 72 4 4 0 4 2] [ 0 1 0 0 1 70 0 0 4 15] [ 1 1 0 0 3 0 84 0 2 0] [ 0 1 0 6 1 8 0 70 2 1] [ 0 2 2 4 2 13 4 2 56 3] [ 0 1 0 4 0 14 0 0 6 67]]
<matplotlib.axes._subplots.AxesSubplot at 0x1a2b969748>