In [ ]:

from google.colab import drive
drive.mount('/data/')
data_dir = '/data/My Drive/Colab Notebooks/Experiment'
!ls '/data/My Drive/Colab Notebooks/Experiment'
!pip install matplotlib

Mounted at /data/
Iris.csv  m_data.csv  w_data.csv
Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (3.2.2)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (2.4.7)
Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (1.18.5)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (1.2.0)
Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (2.8.1)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (0.10.0)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.1->matplotlib) (1.15.0)

In [ ]:

import pandas as pd

df = pd.read_csv(data_dir+'/Iris.csv')
df.head()

Out[ ]:

	Id	SepalLengthCm	SepalWidthCm	PetalLengthCm	PetalWidthCm	Species
0	1	5.1	3.5	1.4	0.2	Iris-setosa
1	2	4.9	3.0	1.4	0.2	Iris-setosa
2	3	4.7	3.2	1.3	0.2	Iris-setosa
3	4	4.6	3.1	1.5	0.2	Iris-setosa
4	5	5.0	3.6	1.4	0.2	Iris-setosa

In [ ]:

import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt

In [ ]:

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB

In [ ]:

df['Species'].value_counts()

Out[ ]:

Iris-setosa        50
Iris-virginica     50
Iris-versicolor    50
Name: Species, dtype: int64

In [ ]:

df.drop('Id', axis=1, inplace=True)

In [ ]:

df

Out[ ]:

	SepalLengthCm	SepalWidthCm	PetalLengthCm	PetalWidthCm	Species
0	5.1	3.5	1.4	0.2	Iris-setosa
1	4.9	3.0	1.4	0.2	Iris-setosa
2	4.7	3.2	1.3	0.2	Iris-setosa
3	4.6	3.1	1.5	0.2	Iris-setosa
4	5.0	3.6	1.4	0.2	Iris-setosa
...	...	...	...	...	...
145	6.7	3.0	5.2	2.3	Iris-virginica
146	6.3	2.5	5.0	1.9	Iris-virginica
147	6.5	3.0	5.2	2.0	Iris-virginica
148	6.2	3.4	5.4	2.3	Iris-virginica
149	5.9	3.0	5.1	1.8	Iris-virginica

150 rows × 5 columns

In [ ]:

figure = df[df['Species']=='Iris-setosa'].plot.scatter(x='SepalLengthCm', y='SepalWidthCm', color='red', label='Setosa')
df[df['Species']=='Iris-versicolor'].plot.scatter(x='SepalLengthCm', y='SepalWidthCm', color='blue', label='versicolor', ax=figure)
df[df['Species']=='Iris-virginica'].plot.scatter(x='SepalLengthCm', y='SepalWidthCm', color='green', label='virginica', ax=figure)

figure.set_xlabel("Sepal Length")
figure.set_ylabel("Sepal Width")
figure.set_title("Length VS Width")
figure=plt.gcf()
figure.set_size_inches(10, 6)
plt.show()

In [ ]:

figure = df[df['Species']=='Iris-setosa'].plot.scatter(x='PetalLengthCm', y='PetalWidthCm', color='red', label='Setosa')
df[df['Species']=='Iris-versicolor'].plot.scatter(x='PetalLengthCm', y='PetalWidthCm', color='blue', label='versicolor', ax=figure)
df[df['Species']=='Iris-virginica'].plot.scatter(x='PetalLengthCm', y='PetalWidthCm', color='green', label='virginica', ax=figure)

figure.set_xlabel("Petal Length")
figure.set_ylabel("Petal Width")
figure.set_title("Length VS Width")
figure=plt.gcf()
figure.set_size_inches(10, 6)
plt.show()

In [ ]:

df.hist(edgecolor='green', linewidth=1.2)
figure=plt.gcf()
figure.set_size_inches(12, 6)
plt.show()

In [ ]:

plt.figure(figsize=(12,6))
plt.subplot(2,2,1)
sns.violinplot(x='Species',y='SepalLengthCm',data=df)
plt.subplot(2,2,2)
sns.violinplot(x='Species',y='SepalWidthCm',data=df)
plt.subplot(2,2,3)
sns.violinplot(x='Species',y='PetalLengthCm',data=df)
plt.subplot(2,2,4)
sns.violinplot(x='Species',y='PetalWidthCm',data=df)

Out[ ]:

<matplotlib.axes._subplots.AxesSubplot at 0x7f7c10691080>

In [ ]:

from sklearn.linear_model import LogisticRegression  
from sklearn.model_selection import train_test_split 
from sklearn.neighbors import KNeighborsClassifier 
from sklearn import svm  
from sklearn import metrics 
from sklearn.tree import DecisionTreeClassifier

In [ ]:

df.shape

Out[ ]:

(150, 5)

In [ ]:

plt.figure(figsize=(7,4)) 
sns.heatmap(df.corr(),annot=True,cmap='cubehelix_r') 
plt.show()

In [ ]:

train, test = train_test_split(df, test_size = 0.3)

print(train.shape)
print(test.shape)

(105, 5)
(45, 5)

In [ ]:

train_x = train[['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']]
train_y=train['Species']
test_x= test[['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']] 
test_y =test['Species']   

In [ ]:

train_x.head()

Out[ ]:

	SepalLengthCm	SepalWidthCm	PetalLengthCm	PetalWidthCm
74	6.4	2.9	4.3	1.3
129	7.2	3.0	5.8	1.6
16	5.4	3.9	1.3	0.4
124	6.7	3.3	5.7	2.1
19	5.1	3.8	1.5	0.3

In [ ]:

train_y.head()

Out[ ]:

74     Iris-versicolor
129     Iris-virginica
16         Iris-setosa
124     Iris-virginica
19         Iris-setosa
Name: Species, dtype: object

In [ ]:

model = svm.SVC()
model.fit(train_x, train_y)
prediction = model.predict(test_x)
print('the accuracy rate of SVM is:', metrics.accuracy_score(prediction, test_y))

the accuracy rate of SVM is: 0.9555555555555556

In [ ]:

model = LogisticRegression()
model.fit(train_x,train_y)
prediction=model.predict(test_x)
print('The accuracy of the Logistic Regression is',metrics.accuracy_score(prediction,test_y))

The accuracy of the Logistic Regression is 0.9555555555555556

/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)

In [ ]:

model = DecisionTreeClassifier()
model.fit(train_x, train_y)
prediction = model.predict(test_x)
print('The accuracy of Decision Tree is:', metrics.accuracy_score(prediction, test_y))

The accuracy of Decision Tree is: 0.9555555555555556

In [ ]:

model = KNeighborsClassifier(n_neighbors=3)
model.fit(train_x, train_y)
prediction = model.predict(test_x)
print('The accuracy of the KNeighbors is:', metrics.accuracy_score(prediction, test_y))

The accuracy of the KNeighbors is: 0.9777777777777777

In [ ]:

a_index=list(range(1,11))
a=pd.Series()
x=[1,2,3,4,5,6,7,8,9,10]
for i in list(range(1,11)):
    model=KNeighborsClassifier(n_neighbors=i) 
    model.fit(train_x,train_y)
    prediction=model.predict(test_x)
    a=a.append(pd.Series(metrics.accuracy_score(prediction,test_y)))
plt.plot(a_index, a)
plt.xticks(x)

/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:2: DeprecationWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.

Out[ ]:

([<matplotlib.axis.XTick at 0x7f7c075da748>,
  <matplotlib.axis.XTick at 0x7f7c075da710>,
  <matplotlib.axis.XTick at 0x7f7c075da358>,
  <matplotlib.axis.XTick at 0x7f7c0757ce10>,
  <matplotlib.axis.XTick at 0x7f7c075882e8>,
  <matplotlib.axis.XTick at 0x7f7c07588780>,
  <matplotlib.axis.XTick at 0x7f7c07588c18>,
  <matplotlib.axis.XTick at 0x7f7c07590160>,
  <matplotlib.axis.XTick at 0x7f7c07588a90>,
  <matplotlib.axis.XTick at 0x7f7c075901d0>],
 <a list of 10 Text major ticklabel objects>)

In [ ]:

petal=df[['PetalLengthCm','PetalWidthCm','Species']]
sepal=df[['SepalLengthCm','SepalWidthCm','Species']]

In [ ]:

train_p,test_p=train_test_split(petal,test_size=0.3,random_state=0) 
train_x_p=train_p[['PetalWidthCm','PetalLengthCm']]
train_y_p=train_p.Species
test_x_p=test_p[['PetalWidthCm','PetalLengthCm']]
test_y_p=test_p.Species


train_s,test_s=train_test_split(sepal,test_size=0.3,random_state=0)
train_x_s=train_s[['SepalWidthCm','SepalLengthCm']]
train_y_s=train_s.Species
test_x_s=test_s[['SepalWidthCm','SepalLengthCm']]
test_y_s=test_s.Species

In [ ]:

model=svm.SVC()
model.fit(train_x_p,train_y_p) 
prediction=model.predict(test_x_p) 
print('The accuracy of the SVM using Petals is:',metrics.accuracy_score(prediction,test_y_p))

model=svm.SVC()
model.fit(train_x_s,train_y_s) 
prediction=model.predict(test_x_s) 
print('The accuracy of the SVM using Sepal is:',metrics.accuracy_score(prediction,test_y_s))

The accuracy of the SVM using Petals is: 0.9777777777777777
The accuracy of the SVM using Sepal is: 0.8

In [ ]:

model = LogisticRegression()
model.fit(train_x_p,train_y_p) 
prediction=model.predict(test_x_p) 
print('The accuracy of the Logistic Regression using Petals is:',metrics.accuracy_score(prediction,test_y_p))

model.fit(train_x_s,train_y_s) 
prediction=model.predict(test_x_s) 
print('The accuracy of the Logistic Regression using Sepals is:',metrics.accuracy_score(prediction,test_y_s))

The accuracy of the Logistic Regression using Petals is: 0.9777777777777777
The accuracy of the Logistic Regression using Sepals is: 0.8222222222222222

In [ ]:

model=DecisionTreeClassifier()
model.fit(train_x_p,train_y_p) 
prediction=model.predict(test_x_p) 
print('The accuracy of the Decision Tree using Petals is:',metrics.accuracy_score(prediction,test_y_p))

model.fit(train_x_s,train_y_s) 
prediction=model.predict(test_x_s) 
print('The accuracy of the Decision Tree using Sepals is:',metrics.accuracy_score(prediction,test_y_s))

The accuracy of the Decision Tree using Petals is: 0.9555555555555556
The accuracy of the Decision Tree using Sepals is: 0.6666666666666666

In [ ]:

model=KNeighborsClassifier(n_neighbors=3) 
model.fit(train_x_p,train_y_p) 
prediction=model.predict(test_x_p) 
print('The accuracy of the KNN using Petals is:',metrics.accuracy_score(prediction,test_y_p))

model.fit(train_x_s,train_y_s) 
prediction=model.predict(test_x_s) 
print('The accuracy of the KNN using Sepals is:',metrics.accuracy_score(prediction,test_y_s))

The accuracy of the KNN using Petals is: 0.9777777777777777
The accuracy of the KNN using Sepals is: 0.7333333333333333

Using Petals over Sepal for training the data gives a much better accuracy

	SepalLengthCm	SepalWidthCm	PetalLengthCm	PetalWidthCm
74	6.4	2.9	4.3	1.3
129	7.2	3.0	5.8	1.6
16	5.4	3.9	1.3	0.4
124	6.7	3.3	5.7	2.1
19	5.1	3.8	1.5	0.3

	SepalLengthCm	SepalWidthCm	PetalLengthCm	PetalWidthCm
74	6.4	2.9	4.3	1.3
129	7.2	3.0	5.8	1.6
16	5.4	3.9	1.3	0.4
124	6.7	3.3	5.7	2.1
19	5.1	3.8	1.5	0.3

	SepalLengthCm	SepalWidthCm	PetalLengthCm	PetalWidthCm
74	6.4	2.9	4.3	1.3
129	7.2	3.0	5.8	1.6
16	5.4	3.9	1.3	0.4
124	6.7	3.3	5.7	2.1
19	5.1	3.8	1.5	0.3