KNN with Iris

Libraries

In [1]:
import sklearn.neighbors as nei
import pandas as pd
import sklearn.model_selection as mod

Data

In [2]:
# Load the iris data set from a URL.
df = pd.read_csv("https://github.com/ianmcloughlin/datasets/raw/master/iris.csv")
In [3]:
df
Out[3]:
sepal_length sepal_width petal_length petal_width class
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
... ... ... ... ... ...
145 6.7 3.0 5.2 2.3 virginica
146 6.3 2.5 5.0 1.9 virginica
147 6.5 3.0 5.2 2.0 virginica
148 6.2 3.4 5.4 2.3 virginica
149 5.9 3.0 5.1 1.8 virginica

150 rows × 5 columns

Visualise

In [4]:
# Load the seaborn package.
import seaborn as sns

# Plot the Iris data set with a pair plot.
sns.pairplot(df, hue="class")
Out[4]:
<seaborn.axisgrid.PairGrid at 0x1e0de7d0988>

Inputs and outputs

In [5]:
inputs = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
outputs = df['class']

Classifier

In [6]:
knn = nei.KNeighborsClassifier(n_neighbors=5)

Fit

In [7]:
knn.fit(inputs, outputs)
Out[7]:
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

Predict

In [8]:
df.loc[121]
Out[8]:
sepal_length          5.6
sepal_width           2.8
petal_length          4.9
petal_width             2
class           virginica
Name: 121, dtype: object
In [9]:
knn.predict([[5.6, 2.8, 4.9, 2.0]])
Out[9]:
array(['virginica'], dtype=object)

Evaluate

In [10]:
(knn.predict(inputs) == outputs).sum()
Out[10]:
145
In [11]:
inputs_train, inputs_test, outputs_train, outputs_test = mod.train_test_split(inputs, outputs, test_size=0.33)
In [12]:
knn = nei.KNeighborsClassifier(n_neighbors=5)
knn.fit(inputs_train, outputs_train)
Out[12]:
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')
In [13]:
(knn.predict(inputs_test) == outputs_test).sum()
Out[13]:
48

End