āĻā§āĻĒāĻŋāĻāĻžāĻ° āĻ¨ā§āĻāĻŦā§āĻā§āĻ° āĻ˛āĻŋāĻāĻ https://github.com/raqueeb/ml-python/blob/master/model-evaluation3.ipynb
āĻĄāĻžāĻāĻ¨āĻ˛ā§āĻĄ āĻāĻ°ā§ āĻ¨āĻŋāĻ¨ āĻ¨āĻŋāĻā§āĻ° āĻŦā§āĻ¯āĻŦāĻšāĻžāĻ°ā§āĻ° āĻāĻ¨ā§āĻ¯, āĻ§āĻžāĻ°āĻŖāĻžāĻ° āĻāĻ¨ā§āĻ¯ āĻ§āĻ¨ā§āĻ¯āĻŦāĻžāĻĻ āĻā§āĻāĻŋāĻ¨ āĻŽāĻžāĻ°ā§āĻāĻžāĻŽāĻā§āĨ¤ āĻĄā§āĻāĻžāĻ¸ā§āĻā§āĻ˛āĨ¤
āĻ¨ā§āĻāĻŦāĻžāĻ°ā§āĻ° āĻ¸āĻāĻā§āĻ¯āĻž ā§Š āĻĨā§āĻā§ ā§Ģ āĻāĻ°āĻžāĻ° āĻĒāĻ° āĻ ā§āĻ¯āĻžāĻā§āĻ¯ā§āĻ°ā§āĻ¸āĻŋ 0.95 āĻĨā§āĻā§ 0.96 āĻšā§ā§āĻā§āĨ¤ āĻāĻāĻ¨ āĻ¨ā§āĻāĻŦāĻžāĻ°ā§āĻ° āĻ¸āĻāĻā§āĻ¯āĻž āĻŦāĻžāĻ° āĻŦāĻžāĻ° āĻĒāĻžāĻ˛ā§āĻā§ āĻĻā§āĻāĻž āĻ¯ā§āĻ¤ā§ āĻĒāĻžāĻ°ā§ āĻā§āĻĨāĻžā§ āĻ¤āĻžāĻ° āĻ ā§āĻ¯āĻžāĻā§āĻ¯ā§āĻ°ā§āĻ¸āĻŋ āĻ¸āĻŦāĻā§ā§ā§ āĻŦā§āĻļāĻŋāĨ¤ āĻŽā§āĻ¯āĻžāĻ¨ā§ā§āĻžāĻ˛āĻŋ āĻ¨āĻž āĻāĻ°ā§ āĻĢā§āĻ˛ā§ āĻĻā§āĻ āĻĒā§āĻ°ā§āĻā§āĻ°āĻžāĻŽāĻŋāĻ āĻ˛ā§āĻĒā§āĨ¤ āĻ¸ā§āĻ āĻŦā§āĻ° āĻāĻ°ā§ āĻĻā§āĻŦā§ āĻā§āĻĨāĻžā§ āĻ ā§āĻ¯āĻžāĻā§āĻ¯ā§āĻ°ā§āĻ¸āĻŋ āĻāĻžāĻ˛ā§āĨ¤
# āĻ¯ā§āĻ āĻāĻ°ā§ āĻ¨ā§āĻ āĻāĻā§āĻ° āĻ
āĻāĻļāĻā§āĻ˛ā§
# āĻļā§āĻ°ā§āĻ¤ā§ āĻ˛ā§āĻĄ āĻāĻ°ā§ āĻ¨ā§āĻ āĻāĻāĻ°āĻŋāĻ¸ āĻĄā§āĻāĻžāĻ¸ā§āĻ
from sklearn.datasets import load_iris
iris = load_iris()
# āĻĢāĻŋāĻāĻžāĻ° āĻāĻ° āĻāĻžāĻ°ā§āĻā§āĻ āĻ°ā§āĻ¸āĻĒāĻ¨ā§āĻ¸ āĻāĻ˛ā§ āĻ¯āĻžāĻā§āĻā§ X āĻāĻŦāĻ y
X = iris.data
y = iris.target
# STEP 1: split X and y into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=4)
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
# āĻāĻŽāĻ°āĻž K=1 āĻĨā§āĻā§ K=25 āĻ˛ā§āĻĒ āĻāĻžāĻ˛āĻŋā§ā§ āĻ¸ā§āĻāĻžāĻā§ āĻ°ā§āĻāĻ°ā§āĻĄ āĻāĻ°āĻŋ
neighbors_settings = list(range(1, 26))
scores = []
for k in neighbors_settings:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
scores.append(metrics.accuracy_score(y_test, y_pred))
āĻŽā§āĻ¯āĻžāĻ¨ā§ā§āĻžāĻ˛āĻŋ āĻ¨āĻž āĻĻā§āĻā§ k_range āĻāĻŦāĻ scoresāĻā§ x, y āĻāĻā§āĻ¸āĻŋāĻ¸ā§ āĻĒā§āĻ˛āĻ āĻāĻ°āĻŋāĨ¤ āĻāĻŋāĻā§āĻ¯ā§ā§āĻžāĻ˛āĻžāĻāĻā§āĻļāĻ¨ āĻāĻ āĻĻā§āĻ¯ āĻāĻŋāĻ!
# MatplotlibāĻā§ āĻāĻŽāĻĒā§āĻ°ā§āĻ āĻāĻ°ā§ āĻ¨āĻŋā§ā§ āĻāĻ¸āĻŋ (āĻāĻŽāĻžāĻĻā§āĻ° āĻ¸āĻžāĻāĻ¨ā§āĻāĻŋāĻĢāĻŋāĻ āĻĒā§āĻ˛āĻāĻŋāĻ āĻ˛āĻžāĻāĻŦā§āĻ°ā§āĻ°āĻŋ)
import matplotlib.pyplot as plt
# āĻāĻŽāĻžāĻĻā§āĻ° āĻā§āĻĒāĻŋāĻāĻžāĻ° āĻ¨ā§āĻāĻŦā§āĻā§ āĻāĻ¨āĻ˛āĻžāĻāĻ¨ā§ āĻĻā§āĻāĻžāĻ¨ā§āĻ° āĻāĻ¨ā§āĻ¯
%matplotlib inline
# āĻāĻŽāĻ°āĻž K āĻāĻŦāĻ "testing accuracy" āĻāĻ° āĻ¸āĻŽā§āĻĒāĻ°ā§āĻ āĻĒā§āĻ˛āĻ āĻāĻ°āĻāĻŋ
plt.plot(neighbors_settings, scores)
# āĻ˛ā§āĻŦā§āĻ˛ā§āĻ° āĻāĻ¨ā§āĻ¯
plt.xlabel('Value of K for KNN')
plt.ylabel('Testing Accuracy')
Text(0,0.5,'Testing Accuracy')
# āĻāĻāĻžāĻ¨ā§ āĻĻā§āĻāĻāĻŋ ā§ āĻĨā§āĻā§ ā§§ā§ āĻĒāĻ°ā§āĻ¯āĻ¨ā§āĻ¤ āĻ
ā§āĻ¯āĻžāĻā§āĻ¯ā§āĻ°ā§āĻ¸āĻŋ āĻāĻžāĻ˛ā§, āĻā§āĻ¸ā§āĻ āĻāĻ°āĻāĻŋ ā§§ā§Ļ āĻĻāĻŋā§ā§
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))
0.983333333333
āĻā§ āĻŦā§āĻāĻ˛āĻžāĻŽ āĻāĻāĻžāĻ¨ā§?
"āĻā§-āĻ¨āĻŋā§āĻžāĻ°ā§āĻ¸ā§āĻ āĻ¨ā§āĻāĻŦāĻžāĻ°ā§āĻ¸" āĻŽāĻĄā§āĻ˛ā§āĻ° āĻāĻŽāĻĒā§āĻ˛ā§āĻā§āĻ¸āĻŋāĻāĻŋ āĻ¨āĻŋāĻ°ā§āĻāĻ° āĻāĻ°āĻā§ K āĻāĻ° āĻā§āĻ¯āĻžāĻ˛ā§āĻ° āĻāĻĒāĻ°āĨ¤ āĻā§āĻ¯āĻžāĻ˛ā§ āĻāĻŽ āĻšāĻ˛ā§ āĻāĻŽāĻĒā§āĻ˛ā§āĻā§āĻ¸āĻŋāĻāĻŋ āĻŦā§āĻļāĻŋāĨ¤
āĻŽāĻĄā§āĻ˛ā§āĻ° āĻā§āĻ°ā§āĻ¨āĻŋāĻ āĻ ā§āĻ¯āĻžāĻā§āĻ¯ā§āĻ°ā§āĻ¸āĻŋ āĻŦāĻžā§ā§ āĻŽāĻĄā§āĻ˛ā§āĻ° āĻāĻŽāĻĒā§āĻ˛ā§āĻā§āĻ¸āĻŋāĻāĻŋ āĻŦāĻžā§āĻ˛ā§āĨ¤
āĻā§āĻ¸ā§āĻāĻŋāĻ āĻ ā§āĻ¯āĻžāĻā§āĻ¯ā§āĻ°ā§āĻ¸āĻŋ āĻĒā§āĻ¨āĻžāĻ˛ā§āĻāĻŋ āĻāĻ°ā§ āĻŽāĻĄā§āĻ˛ āĻā§āĻŦ āĻŦā§āĻļāĻŋ āĻāĻŽāĻĒā§āĻ˛ā§āĻā§āĻ¸āĻŋāĻāĻŋ āĻ āĻĨāĻŦāĻž āĻā§āĻŦ āĻ¸āĻšāĻ āĻšā§ā§ āĻā§āĻ˛ā§āĨ¤
# āĻ¨ā§āĻāĻŦāĻžāĻ°ā§āĻ¸ ā§§ā§Ļ āĻ§āĻ°āĻ˛āĻžāĻŽ
knn = KNeighborsClassifier(n_neighbors=10)
# āĻŽāĻĄā§āĻ˛āĻā§ āĻā§āĻ°ā§āĻāĻ¨ āĻāĻ°āĻ¤ā§ āĻšāĻŦā§ X āĻāĻŦāĻ y āĻĻāĻŋā§ā§ (X_train, y_train āĻ¨ā§)
knn.fit(X, y)
# āĻĒā§āĻ°ā§āĻĄāĻŋāĻā§āĻ āĻāĻ°āĻŋ āĻ¨āĻ¤ā§āĻ¨ āĻ¸ā§āĻ¯āĻžāĻŽā§āĻĒāĻ˛ āĻĻāĻŋā§ā§
knn.predict([[3, 5, 4, 2]])
array([1])
āĻ āĻĨāĻŦāĻž, āĻ¯āĻĻāĻŋ āĻ¸āĻ°āĻžāĻ¸āĻ°āĻŋ āĻāĻžāĻ¨āĻ¤ā§ āĻāĻžāĻ āĻāĻāĻ°āĻŋāĻ¸ āĻĒā§āĻ°āĻāĻžāĻ¤āĻŋāĻ° āĻ¨āĻžāĻŽāĻāĻž?
print("Predicted target name:",
iris['target_names'][knn.predict([[3, 5, 4, 2]])])
Predicted target name: ['versicolor']
āĻŽāĻ¨ā§ āĻāĻā§ "āĻŽāĻĄā§āĻ˛ā§āĻ° āĻā§āĻ¨āĻžāĻ°ā§āĻ˛āĻžāĻāĻā§āĻļāĻ¨, āĻāĻāĻžāĻ°-āĻĢāĻŋāĻāĻŋāĻ āĻāĻŦāĻ āĻāĻ¨ā§āĻĄāĻžāĻ°-āĻĢāĻŋāĻāĻŋāĻ" āĻā§āĻ¯āĻžāĻĒā§āĻāĻžāĻ°ā§āĻ° āĻāĻ˛āĻžāĻĒāĻā§āĻ˛ā§āĻ° āĻāĻĨāĻž? āĻāĻŽāĻ°āĻž āĻāĻāĻāĻž āĻ¸āĻŽā§āĻĒāĻ°ā§āĻ āĻŦā§āĻ° āĻāĻ°āĻ¤ā§ āĻāĻžāĻā§āĻāĻŋāĻ˛āĻžāĻŽ āĻŽāĻĄā§āĻ˛ āĻāĻŽāĻĒā§āĻ˛ā§āĻā§āĻ¸āĻŋāĻāĻŋāĻ° āĻ¸āĻžāĻĨā§ 'āĻā§āĻ¨ā§āĻ°āĻžāĻ˛āĻžāĻāĻā§āĻļāĻ¨' āĻ¨āĻŋā§ā§āĨ¤ āĻāĻāĻžāĻ¨ā§ āĻāĻŽāĻ°āĻž āĻāĻāĻ°āĻŋāĻ¸ āĻĄā§āĻāĻžāĻ¸ā§āĻā§āĻ° āĻā§āĻ°ā§āĻ¨āĻŋāĻ āĻāĻ° āĻā§āĻ¸ā§āĻ āĻĄā§āĻāĻžāĻ¸ā§āĻā§āĻ° āĻĒāĻžāĻ°āĻĢāĻ°āĻŽā§āĻ¯āĻžāĻ¨ā§āĻ¸ āĻĻā§āĻāĻŋ "āĻā§-āĻ¨āĻŋā§āĻžāĻ°ā§āĻ¸ā§āĻ āĻ¨ā§āĻāĻŦāĻžāĻ°ā§āĻ¸" āĻāĻ° āĻ¨ā§āĻāĻŦāĻžāĻ°ā§āĻ° āĻ¸āĻāĻā§āĻ¯āĻž āĻ¨āĻŋā§ā§āĨ¤
āĻāĻāĻžāĻ¨ā§ āĻā§āĻ°ā§āĻ¨āĻŋāĻ āĻāĻŦāĻ āĻā§āĻ¸ā§āĻ āĻ¸ā§āĻā§āĻ° āĻ ā§āĻ¯āĻžāĻā§āĻ¯ā§āĻ°ā§āĻ¸āĻŋ āĻĻā§āĻāĻ¤ā§ 'āĻ ā§āĻ¯āĻžāĻā§āĻ¯ā§āĻ°ā§āĻ¸āĻŋ' āĻĢā§āĻ˛ā§āĻāĻŋ āĻā§āĻžāĻ āĻāĻā§āĻ¸āĻŋāĻ¸ā§āĨ¤ "āĻā§-āĻ¨āĻŋā§āĻžāĻ°ā§āĻ¸ā§āĻ āĻ¨ā§āĻāĻŦāĻžāĻ°ā§āĻ¸" āĻāĻ° āĻ¨ā§āĻāĻŦāĻžāĻ°ā§āĻ° āĻ¸āĻāĻā§āĻ¯āĻžāĻā§ āĻĻā§āĻāĻžāĻ¨ā§ āĻšā§ā§āĻā§ āĻāĻā§āĻ¸ āĻāĻā§āĻ¸āĻŋāĻ¸ā§āĨ¤ āĻŽāĻ¨ā§ āĻāĻā§ āĻ¤ā§ āĻāĻŽ āĻ¨ā§āĻāĻŦāĻžāĻ° āĻŽāĻžāĻ¨ā§ āĻŦā§āĻļāĻŋ āĻāĻŽāĻĒā§āĻ˛ā§āĻā§āĻ¸ āĻŽāĻĄā§āĻ˛? āĻāĻāĻāĻž āĻ¨ā§āĻāĻŦāĻžāĻ° āĻ¨āĻŋā§ā§ āĻā§āĻ°ā§āĻ¨āĻŋāĻ āĻ¸ā§āĻ āĻāĻāĻĻāĻŽ āĻĒāĻžāĻ°āĻĢā§āĻā§āĻāĨ¤ āĻ¯āĻāĻ¨ āĻ¨ā§āĻāĻŦāĻžāĻ° āĻŦāĻžā§āĻā§, āĻŽāĻĄā§āĻ˛ āĻāĻ¸ā§āĻ¤ā§ āĻāĻ¸ā§āĻ¤ā§ āĻ¸āĻŋāĻŽā§āĻĒāĻ˛āĻžāĻ° āĻŽāĻžāĻ¨ā§ āĻ¸āĻšāĻ āĻšāĻā§āĻā§āĨ¤ āĻĢāĻ˛ā§ āĻ ā§āĻ¯āĻžāĻā§āĻ¯ā§āĻ°ā§āĻ¸āĻŋ āĻāĻŽāĻā§āĨ¤
āĻā§āĻ¸ā§āĻ āĻ¸ā§āĻā§āĻ° āĻ ā§āĻ¯āĻžāĻā§āĻ¯ā§āĻ°ā§āĻ¸āĻŋ āĻāĻŋāĻ¨ā§āĻ¤ā§ āĻāĻŽ āĻāĻāĻāĻž āĻ¨ā§āĻāĻŦāĻžāĻ°ā§āĨ¤ āĻ¤āĻŦā§ āĻŦā§āĻļāĻŋ āĻ¨ā§āĻāĻŦāĻžāĻ° āĻšāĻā§āĻžāĻ¤ā§ āĻ¸ā§āĻāĻž āĻāĻ°ā§ āĻāĻŽāĻā§āĨ¤ āĻāĻāĻāĻž āĻ¨ā§āĻāĻŦāĻžāĻ°ā§ āĻŽāĻĄā§āĻ˛ āĻ āĻ¨ā§āĻ āĻāĻŽāĻĒā§āĻ˛ā§āĻā§āĻ¸ āĻ¤āĻŦā§ āĻ¯āĻ¤ā§ āĻŦā§āĻļāĻŋ āĻ¨ā§āĻāĻŦāĻžāĻ° āĻŦāĻžā§āĻā§ āĻ ā§āĻ¯āĻžāĻā§āĻ¯ā§āĻ°ā§āĻ¸āĻŋ āĻāĻŽāĻā§āĨ¤ āĻ¤āĻŦā§ āĻŽāĻžāĻāĻžāĻŽāĻžāĻāĻŋ āĻāĻžā§āĻāĻžā§ āĻŽāĻĄā§āĻ˛ āĻāĻžāĻ˛ā§ āĻāĻ°āĻā§āĨ¤
%matplotlib inline
# āĻāĻā§āĻ° āĻ¸āĻŦ āĻāĻ˛ā§āĻĒ, āĻļā§āĻ§ā§ āĻŦāĻžā§āĻŋā§ā§āĻāĻŋ n_neighbors āĻāĻ° āĻ¸āĻāĻā§āĻ¯āĻž - ā§¨ā§Ģ āĻĒāĻ°ā§āĻ¯āĻ¨ā§āĻ¤
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X, y, stratify=y, random_state=4)
from sklearn.neighbors import KNeighborsClassifier
training_accuracy = []
test_accuracy = []
# try n_neighbors from 1 to 25
neighbors_settings = range(1, 26)
for n_neighbors in neighbors_settings:
# build the model
knn = KNeighborsClassifier(n_neighbors=n_neighbors)
knn.fit(X_train, y_train)
# record training set accuracy
training_accuracy.append(knn.score(X_train, y_train))
# record generalization accuracy
test_accuracy.append(knn.score(X_test, y_test))
#āĻ¨āĻŋāĻā§āĻ° āĻĒā§āĻ˛āĻāĻāĻž āĻĻā§āĻāĻžāĻā§āĻā§ āĻā§āĻ°ā§āĻ¨āĻŋāĻ āĻāĻŦāĻ āĻā§āĻ¸ā§āĻ āĻ¸ā§āĻā§āĻ° āĻ
ā§āĻ¯āĻžāĻā§āĻ¯ā§āĻ°ā§āĻ¸āĻŋ
import matplotlib.pyplot as plt
plt.plot(neighbors_settings, training_accuracy, label="training accuracy")
plt.plot(neighbors_settings, test_accuracy, label="test accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Numbers of neighbors")
plt.legend()
<matplotlib.legend.Legend at 0x1674dee9d30>