# import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
from sklearn.linear_model import LogisticRegression
# Collect data
x0 = np.random.randn(100) + 2
x1 = np.random.randn(100) + 2
x0_ = np.random.randn(100) + 3
x1_ = np.random.randn(100) + 3
xx0 = np.concatenate((x0,x0_))
xx1 = np.concatenate((x1,x1_))
y = np.concatenate((np.zeros(100) ,np.ones(100)))
d = {'x0': xx0, "x1":xx1, "y":y}
data = pd.DataFrame(d)
data.head(3)
x0 | x1 | y | |
---|---|---|---|
0 | 0.919731 | 1.550680 | 0.0 |
1 | 1.642133 | 0.356495 | 0.0 |
2 | 3.983291 | 1.786487 | 0.0 |
data.shape
(200, 3)
c=['b','r']
mycolors = [c[0] if i==0 else c[1] for i in y]
data.plot.scatter('x0', 'x1' ,c=mycolors)
<matplotlib.axes._subplots.AxesSubplot at 0x1108b8940>
# train test split
from sklearn.model_selection import train_test_split
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
# Split dataset into train ab=nd test sets
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 1/4, random_state = 0)
from sklearn.linear_model import LogisticRegression
# Apply Machine learning algorithm
model = LogisticRegression()
# learn parameters with fit
model.fit(X_train, y_train)
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1, penalty='l2', random_state=None, solver='liblinear', tol=0.0001, verbose=0, warm_start=False)
# prediction
y_pred = model.predict(X_test)
y_pred
array([0., 1., 0., 0., 1., 0., 0., 1., 1., 1., 0., 1., 1., 1., 1., 0., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 1., 0., 1., 0., 0., 1., 1., 0., 1., 1., 0., 0., 1., 1., 1., 0., 1., 0., 1., 1.])
y_test
array([0., 1., 1., 0., 1., 1., 0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 1., 0., 0., 1., 1., 0., 1., 0., 1., 1., 1., 1., 0., 0., 0., 1., 1., 0., 1., 1., 0., 0., 1., 1., 0., 0., 1., 1., 0., 0., 0., 1., 1.])
#Evaluation
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)
0.76
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)
array([[17, 6], [ 6, 21]])
# Gaussian Naive Bayes
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
gaussian = GaussianNB()
gaussian.fit(X_train, y_train)
y_pred = gaussian.predict(X_test)
acc_gaussian = round(accuracy_score(y_pred, y_test) * 100, 2)
print(acc_gaussian)
72.0
# Gradient Boosting Classifier
from sklearn.ensemble import GradientBoostingClassifier
gbk = GradientBoostingClassifier()
gbk.fit(X_train, y_train)
y_pred = gbk.predict(X_test)
acc_gbk = round(accuracy_score(y_pred, y_test) * 100, 2)
print(acc_gbk)
76.0