import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
dataset source: https://www.kaggle.com/rubenssjr/brasilian-houses-to-rent
df = pd.DataFrame(pd.read_csv('sample_data/houses_to_rent_v2.csv'))
display(df.head())
display(df.columns)
display(df.shape)
# Observe the proportion of each class in the dataset
display(100 * (df.groupby('city').count() / int(len(df.city))))
# Observe the count for each class in the dataset
display(df.groupby('city').count())
city | area | rooms | bathroom | parking spaces | floor | animal | furniture | hoa (R$) | rent amount (R$) | property tax (R$) | fire insurance (R$) | total (R$) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | São Paulo | 70 | 2 | 1 | 1 | 7 | acept | furnished | 2065 | 3300 | 211 | 42 | 5618 |
1 | São Paulo | 320 | 4 | 4 | 0 | 20 | acept | not furnished | 1200 | 4960 | 1750 | 63 | 7973 |
2 | Porto Alegre | 80 | 1 | 1 | 1 | 6 | acept | not furnished | 1000 | 2800 | 0 | 41 | 3841 |
3 | Porto Alegre | 51 | 2 | 1 | 0 | 2 | acept | not furnished | 270 | 1112 | 22 | 17 | 1421 |
4 | São Paulo | 25 | 1 | 1 | 0 | 1 | not acept | not furnished | 0 | 800 | 25 | 11 | 836 |
Index(['city', 'area', 'rooms', 'bathroom', 'parking spaces', 'floor', 'animal', 'furniture', 'hoa (R$)', 'rent amount (R$)', 'property tax (R$)', 'fire insurance (R$)', 'total (R$)'], dtype='object')
(10692, 13)
area | rooms | bathroom | parking spaces | floor | animal | furniture | hoa (R$) | rent amount (R$) | property tax (R$) | fire insurance (R$) | total (R$) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
city | ||||||||||||
Belo Horizonte | 11.765806 | 11.765806 | 11.765806 | 11.765806 | 11.765806 | 11.765806 | 11.765806 | 11.765806 | 11.765806 | 11.765806 | 11.765806 | 11.765806 |
Campinas | 7.977927 | 7.977927 | 7.977927 | 7.977927 | 7.977927 | 7.977927 | 7.977927 | 7.977927 | 7.977927 | 7.977927 | 7.977927 | 7.977927 |
Porto Alegre | 11.157875 | 11.157875 | 11.157875 | 11.157875 | 11.157875 | 11.157875 | 11.157875 | 11.157875 | 11.157875 | 11.157875 | 11.157875 | 11.157875 |
Rio de Janeiro | 14.038533 | 14.038533 | 14.038533 | 14.038533 | 14.038533 | 14.038533 | 14.038533 | 14.038533 | 14.038533 | 14.038533 | 14.038533 | 14.038533 |
São Paulo | 55.059858 | 55.059858 | 55.059858 | 55.059858 | 55.059858 | 55.059858 | 55.059858 | 55.059858 | 55.059858 | 55.059858 | 55.059858 | 55.059858 |
area | rooms | bathroom | parking spaces | floor | animal | furniture | hoa (R$) | rent amount (R$) | property tax (R$) | fire insurance (R$) | total (R$) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
city | ||||||||||||
Belo Horizonte | 1258 | 1258 | 1258 | 1258 | 1258 | 1258 | 1258 | 1258 | 1258 | 1258 | 1258 | 1258 |
Campinas | 853 | 853 | 853 | 853 | 853 | 853 | 853 | 853 | 853 | 853 | 853 | 853 |
Porto Alegre | 1193 | 1193 | 1193 | 1193 | 1193 | 1193 | 1193 | 1193 | 1193 | 1193 | 1193 | 1193 |
Rio de Janeiro | 1501 | 1501 | 1501 | 1501 | 1501 | 1501 | 1501 | 1501 | 1501 | 1501 | 1501 | 1501 |
São Paulo | 5887 | 5887 | 5887 | 5887 | 5887 | 5887 | 5887 | 5887 | 5887 | 5887 | 5887 | 5887 |
# Assign a unique integer for each label (city)
df.city = pd.factorize(df['city'])[0]
# Convert boolean strings to 1 and 0
df.animal = pd.Series(np.where(df.animal == 'acept', 1, 0))
df.furniture = pd.Series(np.where(df.furniture == 'furnished', 1, 0))
# Fix inconsistency in column and convert the field to a numeric type
df.floor = df[df.floor == '-'] = 0
df.floor = pd.to_numeric(df.floor)
/home/hp-nunes/anaconda3/envs/TEST/lib/python3.7/site-packages/pandas/core/ops/array_ops.py:253: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison res_values = method(rvalues)
# split our dataset between attributes and labels.
X = df.iloc[:, 1:-1].values # Excludes 'city' & 'total(R$)'
y = df.iloc[:, 0].values # Includes 'city' only
display(df.head())
df.dtypes
city | area | rooms | bathroom | parking spaces | floor | animal | furniture | hoa (R$) | rent amount (R$) | property tax (R$) | fire insurance (R$) | total (R$) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 70 | 2 | 1 | 1 | 0 | 1 | 1 | 2065 | 3300 | 211 | 42 | 5618 |
1 | 0 | 320 | 4 | 4 | 0 | 0 | 1 | 0 | 1200 | 4960 | 1750 | 63 | 7973 |
2 | 1 | 80 | 1 | 1 | 1 | 0 | 1 | 0 | 1000 | 2800 | 0 | 41 | 3841 |
3 | 1 | 51 | 2 | 1 | 0 | 0 | 1 | 0 | 270 | 1112 | 22 | 17 | 1421 |
4 | 0 | 25 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 800 | 25 | 11 | 836 |
city int64 area int64 rooms int64 bathroom int64 parking spaces int64 floor int64 animal int64 furniture int64 hoa (R$) int64 rent amount (R$) int64 property tax (R$) int64 fire insurance (R$) int64 total (R$) int64 dtype: object
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler() # Scale and transform before splitting into training/testing
X = scaler.fit_transform(X)
from sklearn.model_selection import train_test_split
# The test size is set to 20% of the whole dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0,test_size=0.20)
print(X_train.shape, X_test.shape)
(8553, 11) (2139, 11)
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
# Create an instance
knn = KNeighborsClassifier(n_neighbors=6)
# Train the algorithm
model=knn.fit(X_train, y_train)
# Predict the classes on the testing set
y_pred = model.predict(X_test)
# Get the accuracy score on the testing set
print(metrics.accuracy_score(y_test, y_pred))
0.6161757830762038
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred)) # Precision should be large, but recall should be "smaller"
precision recall f1-score support 0 0.68 0.87 0.76 1198 1 0.55 0.41 0.47 238 2 0.45 0.28 0.35 299 3 0.38 0.25 0.30 159 4 0.40 0.20 0.27 245 accuracy 0.62 2139 macro avg 0.49 0.40 0.43 2139 weighted avg 0.58 0.62 0.58 2139
#Choose how many neighbors to test
k_range = range(1,300)
#Create a list to store scores
scores=[]
error = []
#Run the KNN
for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
scores.append(metrics.accuracy_score(y_test, y_pred)) # Append the accuracy score
error.append(np.mean(y_pred != y_test)) # Append the error rate
#Print the scores
print(scores)
[0.5885928003740065, 0.6194483403459561, 0.6068256194483403, 0.6115007012622721, 0.6091631603553063, 0.6161757830762038, 0.6119682094436653, 0.6231884057971014, 0.6147732585320244, 0.6152407667134174, 0.6161757830762038, 0.6157082748948106, 0.6105656848994857, 0.6110331930808789, 0.605890603085554, 0.6133707339878448, 0.6115007012622721, 0.6105656848994857, 0.6077606358111267, 0.6044880785413744, 0.5993454885460495, 0.6044880785413744, 0.603085553997195, 0.6040205703599812, 0.6044880785413744, 0.6068256194483403, 0.6016830294530154, 0.6012155212716223, 0.6012155212716223, 0.6021505376344086, 0.6054230949041608, 0.6026180458158018, 0.6012155212716223, 0.5993454885460495, 0.5965404394576905, 0.5960729312762973, 0.5974754558204769, 0.5970079476390837, 0.5970079476390837, 0.5984104721832632, 0.5988779803646563, 0.5979429640018701, 0.5965404394576905, 0.5988779803646563, 0.5998129967274427, 0.5998129967274427, 0.5984104721832632, 0.5965404394576905, 0.595137914913511, 0.5956054230949042, 0.5956054230949042, 0.5928003740065451, 0.5913978494623656, 0.5899953249181861, 0.5899953249181861, 0.5909303412809724, 0.5885928003740065, 0.5895278167367929, 0.5881252921926133, 0.5867227676484339, 0.5862552594670407, 0.5853202431042543, 0.587190275829827, 0.5857877512856475, 0.5867227676484339, 0.5853202431042543, 0.5881252921926133, 0.5867227676484339, 0.5890603085553997, 0.5895278167367929, 0.5881252921926133, 0.5876577840112202, 0.5881252921926133, 0.5899953249181861, 0.5876577840112202, 0.5867227676484339, 0.5853202431042543, 0.5853202431042543, 0.5862552594670407, 0.5857877512856475, 0.5867227676484339, 0.584385226741468, 0.5834502103786816, 0.5825151940158952, 0.5825151940158952, 0.5839177185600748, 0.5825151940158952, 0.5829827021972884, 0.5834502103786816, 0.5853202431042543, 0.5848527349228612, 0.5853202431042543, 0.5848527349228612, 0.584385226741468, 0.5839177185600748, 0.584385226741468, 0.5848527349228612, 0.5853202431042543, 0.5848527349228612, 0.5848527349228612, 0.584385226741468, 0.5848527349228612, 0.5885928003740065, 0.5890603085553997, 0.5876577840112202, 0.5890603085553997, 0.5853202431042543, 0.5857877512856475, 0.5853202431042543, 0.5834502103786816, 0.5815801776531089, 0.5825151940158952, 0.5815801776531089, 0.5829827021972884, 0.5806451612903226, 0.5801776531089294, 0.5820476858345021, 0.5801776531089294, 0.5797101449275363, 0.5792426367461431, 0.5787751285647499, 0.5783076203833567, 0.5759700794763908, 0.576437587657784, 0.5726975222066386, 0.5731650303880318, 0.5741000467508182, 0.5745675549322113, 0.5797101449275363, 0.5792426367461431, 0.5806451612903226, 0.5797101449275363, 0.5792426367461431, 0.5797101449275363, 0.5825151940158952, 0.5815801776531089, 0.5815801776531089, 0.5815801776531089, 0.5829827021972884, 0.5829827021972884, 0.5778401122019635, 0.5778401122019635, 0.5769050958391772, 0.5787751285647499, 0.5801776531089294, 0.5811126694717158, 0.5792426367461431, 0.5806451612903226, 0.5811126694717158, 0.5787751285647499, 0.5806451612903226, 0.5811126694717158, 0.5801776531089294, 0.5801776531089294, 0.5815801776531089, 0.5806451612903226, 0.5820476858345021, 0.5820476858345021, 0.5820476858345021, 0.5825151940158952, 0.5829827021972884, 0.5815801776531089, 0.5825151940158952, 0.5825151940158952, 0.5815801776531089, 0.5815801776531089, 0.5820476858345021, 0.5815801776531089, 0.5811126694717158, 0.5801776531089294, 0.5797101449275363, 0.5820476858345021, 0.5787751285647499, 0.5797101449275363, 0.5797101449275363, 0.5801776531089294, 0.5801776531089294, 0.5806451612903226, 0.5806451612903226, 0.5815801776531089, 0.5820476858345021, 0.5820476858345021, 0.5811126694717158, 0.5820476858345021, 0.5820476858345021, 0.5811126694717158, 0.5825151940158952, 0.5825151940158952, 0.5829827021972884, 0.5820476858345021, 0.5820476858345021, 0.5820476858345021, 0.5815801776531089, 0.5806451612903226, 0.5815801776531089, 0.5811126694717158, 0.5806451612903226, 0.5811126694717158, 0.5820476858345021, 0.5815801776531089, 0.5806451612903226, 0.5806451612903226, 0.5797101449275363, 0.5792426367461431, 0.5792426367461431, 0.5797101449275363, 0.5792426367461431, 0.5783076203833567, 0.5797101449275363, 0.5792426367461431, 0.5801776531089294, 0.5787751285647499, 0.5783076203833567, 0.5778401122019635, 0.5783076203833567, 0.5787751285647499, 0.5787751285647499, 0.5787751285647499, 0.5792426367461431, 0.5792426367461431, 0.5797101449275363, 0.5792426367461431, 0.5787751285647499, 0.5778401122019635, 0.5801776531089294, 0.5806451612903226, 0.5811126694717158, 0.5811126694717158, 0.5815801776531089, 0.5801776531089294, 0.5792426367461431, 0.5792426367461431, 0.5801776531089294, 0.5797101449275363, 0.5801776531089294, 0.5787751285647499, 0.5759700794763908, 0.5759700794763908, 0.573632538569425, 0.5773726040205703, 0.5741000467508182, 0.5778401122019635, 0.5787751285647499, 0.5778401122019635, 0.5750350631136045, 0.5750350631136045, 0.576437587657784, 0.5759700794763908, 0.5755025712949977, 0.5759700794763908, 0.5745675549322113, 0.5745675549322113, 0.5741000467508182, 0.5741000467508182, 0.5741000467508182, 0.5745675549322113, 0.5750350631136045, 0.5755025712949977, 0.5745675549322113, 0.5726975222066386, 0.5717625058438522, 0.5708274894810659, 0.5698924731182796, 0.5703599812996727, 0.5708274894810659, 0.5684899485741001, 0.5703599812996727, 0.5698924731182796, 0.5694249649368864, 0.5689574567554933, 0.5680224403927069, 0.5661524076671341, 0.5661524076671341, 0.5652173913043478, 0.565684899485741, 0.5670874240299205, 0.5666199158485273, 0.5661524076671341, 0.5675549322113137, 0.5670874240299205, 0.5670874240299205, 0.5689574567554933, 0.5675549322113137, 0.565684899485741, 0.5652173913043478, 0.5652173913043478, 0.5670874240299205, 0.5684899485741001, 0.5670874240299205, 0.5703599812996727, 0.5703599812996727, 0.5689574567554933, 0.5661524076671341, 0.562879850397382, 0.5642823749415615, 0.5624123422159888, 0.562879850397382, 0.5624123422159888, 0.5619448340345956]
plt.figure(figsize=(50,20))
plt.plot(k_range, scores)
plt.axis([1,25,min(scores),1])
plt.xticks(k_range)
plt.yticks(fontsize=30)
plt.xlabel('Value of K',fontsize=35)
plt.ylabel('Test accuracy',fontsize=35)
plt.xlim(0, 300, 10)
plt.ylim(0.56, .63)
plt.show()
plt.figure(figsize=(12, 6))
plt.plot(range(1, 300), error, color='red', linestyle='dashed', marker='o',
markerfacecolor='blue', markersize=10)
plt.title('Error Rate K Value')
plt.xlabel('K Value')
plt.ylabel('Mean Error')
Text(0, 0.5, 'Mean Error')
Compare the model's performance between one single label for the predominating class (i.e. city = 'Sao Paulo') versus all other classes minus the majority class.
df = pd.DataFrame(pd.read_csv('sample_data/houses_to_rent_v2.csv'))
df.animal = pd.Series(np.where(df.animal == 'acept', 1, 0))
df.furniture = pd.Series(np.where(df.furniture == 'furnished', 1, 0))
df.floor = df[df.floor == '-'] = 0
df.floor = pd.to_numeric(df.floor)
/home/hp-nunes/anaconda3/envs/TEST/lib/python3.7/site-packages/pandas/core/ops/array_ops.py:253: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison res_values = method(rvalues)
df.city = pd.Series(np.where(df.city == 'São Paulo',1,0))
# # split our dataset into its attributes and labels.
X = df.iloc[:, 1:-1].values # Excludes 'city'
y = df.iloc[:, 0].values # Includes 'city' only
from sklearn.preprocessing import StandardScaler # Scale and transform before splitting into training/testing
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0,test_size=0.20)
print(X_train.shape, X_test.shape)
(8553, 11) (2139, 11)
#Create an instance
knn = KNeighborsClassifier(n_neighbors=6)
#Train the algorithm
model=knn.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))
0.68630201028518
#Choose how many neighbors to test
k_range = range(1,300)
#Create a list to store scores
scores=[]
error = []
#Run the KNN
for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
scores.append(metrics.accuracy_score(y_test, y_pred)) # Append the accuracy score
error.append(np.mean(y_pred != y_test)) # Append the error rate
#Print the scores
print(scores)
[0.689107059373539, 0.6418887330528285, 0.6834969611968209, 0.6708742402992053, 0.7017297802711547, 0.68630201028518, 0.7073398784478728, 0.6947171575502571, 0.7101449275362319, 0.7003272557269752, 0.7031323048153343, 0.6937821411874708, 0.6975222066386162, 0.697054698457223, 0.697054698457223, 0.6919121084618981, 0.6984572230014026, 0.6956521739130435, 0.6965871902758298, 0.6993922393641889, 0.6919121084618981, 0.6965871902758298, 0.6993922393641889, 0.6993922393641889, 0.6993922393641889, 0.6989247311827957, 0.6984572230014026, 0.7017297802711547, 0.7012622720897616, 0.6961196820944366, 0.6965871902758298, 0.6984572230014026, 0.6965871902758298, 0.7012622720897616, 0.6975222066386162, 0.6956521739130435, 0.6951846657316503, 0.6942496493688639, 0.6965871902758298, 0.697054698457223, 0.6993922393641889, 0.6984572230014026, 0.7007947639083684, 0.7007947639083684, 0.6989247311827957, 0.697054698457223, 0.6961196820944366, 0.6942496493688639, 0.6923796166432913, 0.6961196820944366, 0.6937821411874708, 0.6933146330060777, 0.6965871902758298, 0.6961196820944366, 0.6989247311827957, 0.6947171575502571, 0.6928471248246845, 0.6933146330060777, 0.6919121084618981, 0.6909770920991117, 0.6933146330060777, 0.689107059373539, 0.6872370266479664, 0.6858345021037868, 0.689107059373539, 0.6900420757363254, 0.6895745675549322, 0.6881720430107527, 0.68630201028518, 0.6905095839177186, 0.6867695184665732, 0.6886395511921458, 0.6872370266479664, 0.689107059373539, 0.6877045348293596, 0.6900420757363254, 0.68630201028518, 0.6872370266479664, 0.689107059373539, 0.689107059373539, 0.6867695184665732, 0.6858345021037868, 0.6839644693782141, 0.6867695184665732, 0.6867695184665732, 0.6858345021037868, 0.6848994857410005, 0.6848994857410005, 0.6858345021037868, 0.6858345021037868, 0.6816269284712483, 0.6806919121084619, 0.6853669939223936, 0.6830294530154277, 0.6853669939223936, 0.6834969611968209, 0.6820944366526415, 0.6830294530154277, 0.6825619448340347, 0.6825619448340347, 0.6834969611968209, 0.6806919121084619, 0.6806919121084619, 0.6788218793828892, 0.6792893875642824, 0.6816269284712483, 0.6825619448340347, 0.6825619448340347, 0.6816269284712483, 0.6806919121084619, 0.6806919121084619, 0.6825619448340347, 0.6825619448340347, 0.6858345021037868, 0.6839644693782141, 0.6886395511921458, 0.6858345021037868, 0.6872370266479664, 0.6853669939223936, 0.6877045348293596, 0.6844319775596073, 0.6830294530154277, 0.6811594202898551, 0.6825619448340347, 0.6802244039270687, 0.6811594202898551, 0.6806919121084619, 0.6816269284712483, 0.6820944366526415, 0.6764843384759234, 0.6778868630201028, 0.6746143057503506, 0.6797568957456755, 0.6778868630201028, 0.6778868630201028, 0.6774193548387096, 0.6792893875642824, 0.6778868630201028, 0.6792893875642824, 0.6802244039270687, 0.6820944366526415, 0.6806919121084619, 0.6816269284712483, 0.6806919121084619, 0.6830294530154277, 0.6839644693782141, 0.6811594202898551, 0.6825619448340347, 0.6834969611968209, 0.6830294530154277, 0.6806919121084619, 0.675549322113137, 0.6746143057503506, 0.6750818139317438, 0.675549322113137, 0.6736792893875643, 0.6727442730247779, 0.6760168302945302, 0.6750818139317438, 0.675549322113137, 0.6746143057503506, 0.6736792893875643, 0.6732117812061711, 0.6732117812061711, 0.6718092566619915, 0.6732117812061711, 0.6732117812061711, 0.6718092566619915, 0.6713417484805985, 0.6694717157550257, 0.6736792893875643, 0.6722767648433847, 0.6750818139317438, 0.6746143057503506, 0.6746143057503506, 0.6769518466573166, 0.6746143057503506, 0.6769518466573166, 0.6774193548387096, 0.6802244039270687, 0.6769518466573166, 0.6769518466573166, 0.6778868630201028, 0.6769518466573166, 0.6760168302945302, 0.6764843384759234, 0.6760168302945302, 0.6741467975689575, 0.6732117812061711, 0.6732117812061711, 0.6718092566619915, 0.6722767648433847, 0.6722767648433847, 0.6713417484805985, 0.6727442730247779, 0.6718092566619915, 0.6732117812061711, 0.6727442730247779, 0.6718092566619915, 0.6704067321178121, 0.6727442730247779, 0.6722767648433847, 0.6708742402992053, 0.6685366993922394, 0.667601683029453, 0.6694717157550257, 0.6732117812061711, 0.6704067321178121, 0.6736792893875643, 0.6732117812061711, 0.6746143057503506, 0.6736792893875643, 0.6736792893875643, 0.6741467975689575, 0.675549322113137, 0.6708742402992053, 0.6736792893875643, 0.6718092566619915, 0.6732117812061711, 0.6694717157550257, 0.6722767648433847, 0.6694717157550257, 0.6736792893875643, 0.6741467975689575, 0.6736792893875643, 0.6741467975689575, 0.6708742402992053, 0.6704067321178121, 0.6718092566619915, 0.6718092566619915, 0.6741467975689575, 0.6694717157550257, 0.6760168302945302, 0.6699392239364189, 0.6736792893875643, 0.6690042075736325, 0.6741467975689575, 0.6708742402992053, 0.6769518466573166, 0.6708742402992053, 0.6764843384759234, 0.6699392239364189, 0.6746143057503506, 0.6708742402992053, 0.6746143057503506, 0.6732117812061711, 0.6778868630201028, 0.6760168302945302, 0.675549322113137, 0.6746143057503506, 0.6699392239364189, 0.6690042075736325, 0.6680691912108462, 0.6680691912108462, 0.6661991584852734, 0.6680691912108462, 0.6699392239364189, 0.6708742402992053, 0.6699392239364189, 0.6727442730247779, 0.667601683029453, 0.6690042075736325, 0.6680691912108462, 0.6708742402992053, 0.6699392239364189, 0.6704067321178121, 0.6694717157550257, 0.6708742402992053, 0.6704067321178121, 0.6713417484805985, 0.6713417484805985, 0.6694717157550257, 0.6708742402992053, 0.6732117812061711, 0.6722767648433847, 0.6736792893875643, 0.6722767648433847, 0.6732117812061711, 0.6727442730247779, 0.6722767648433847, 0.6736792893875643, 0.6736792893875643, 0.6732117812061711, 0.6746143057503506, 0.6741467975689575, 0.675549322113137, 0.6774193548387096, 0.6764843384759234, 0.6764843384759234, 0.6764843384759234, 0.6774193548387096, 0.6764843384759234, 0.6774193548387096, 0.6760168302945302, 0.6736792893875643, 0.6746143057503506, 0.6746143057503506, 0.6741467975689575, 0.6750818139317438]
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
[[479 462] [233 965]] precision recall f1-score support 0 0.67 0.51 0.58 941 1 0.68 0.81 0.74 1198 accuracy 0.68 2139 macro avg 0.67 0.66 0.66 2139 weighted avg 0.67 0.68 0.67 2139
plt.figure(figsize=(50,20))
plt.plot(k_range, scores)
plt.axis([1,25,min(scores),1])
plt.xticks(k_range)
plt.yticks(fontsize=30)
plt.xlabel('Value of K',fontsize=35)
plt.ylabel('Test accuracy',fontsize=35)
plt.xlim(0, 300, 10)
plt.ylim(0.64, .72)
plt.show()
plt.figure(figsize=(12, 6))
plt.plot(range(1, 300), error, color='red', linestyle='dashed', marker='o',
markerfacecolor='blue', markersize=10)
plt.title('Error Rate K Value')
plt.xlabel('K Value')
plt.ylabel('Mean Error')
Text(0, 0.5, 'Mean Error')