In [1]:

# import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

%matplotlib inline 
from sklearn.linear_model import LogisticRegression

In [68]:

# Collect data
x0 = np.random.randn(100) + 2
x1 = np.random.randn(100) + 2

x0_ = np.random.randn(100) + 3
x1_ = np.random.randn(100) + 3

xx0 = np.concatenate((x0,x0_))
xx1 = np.concatenate((x1,x1_))
y = np.concatenate((np.zeros(100) ,np.ones(100)))

d = {'x0': xx0, "x1":xx1, "y":y}
data = pd.DataFrame(d)

data.head(3)

Out[68]:

	x0	x1
0	0.919731	1.550680
1	1.642133	0.356495
2	3.983291	1.786487

In [69]:

data.shape

Out[69]:

(200, 3)

In [70]:

c=['b','r']
mycolors = [c[0] if i==0 else c[1] for i in y]

In [71]:

data.plot.scatter('x0', 'x1' ,c=mycolors)

Out[71]:

<matplotlib.axes._subplots.AxesSubplot at 0x1108b8940>

In [72]:

# train test split
from sklearn.model_selection import train_test_split
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

In [73]:

# Split dataset into train ab=nd test sets
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 1/4, random_state = 0)

In [74]:

from sklearn.linear_model import LogisticRegression
# Apply Machine learning algorithm
model = LogisticRegression()

In [75]:

# learn parameters with fit
model.fit(X_train, y_train)

Out[75]:

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [76]:

# prediction
y_pred = model.predict(X_test)

In [77]:

y_pred

Out[77]:

array([0., 1., 0., 0., 1., 0., 0., 1., 1., 1., 0., 1., 1., 1., 1., 0., 0.,
       1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 1., 0., 1., 0.,
       0., 1., 1., 0., 1., 1., 0., 0., 1., 1., 1., 0., 1., 0., 1., 1.])

In [78]:

y_test

Out[78]:

array([0., 1., 1., 0., 1., 1., 0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0.,
       1., 0., 0., 1., 1., 0., 1., 0., 1., 1., 1., 1., 0., 0., 0., 1., 1.,
       0., 1., 1., 0., 0., 1., 1., 0., 0., 1., 1., 0., 0., 0., 1., 1.])

In [79]:

#Evaluation
from sklearn.metrics import accuracy_score

accuracy_score(y_test, y_pred)

Out[79]:

0.76

In [80]:

from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

Out[80]:

array([[17,  6],
       [ 6, 21]])

In [81]:

# Gaussian Naive Bayes
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

gaussian = GaussianNB()
gaussian.fit(X_train, y_train)
y_pred = gaussian.predict(X_test)
acc_gaussian = round(accuracy_score(y_pred, y_test) * 100, 2)
print(acc_gaussian)

72.0

In [82]:

# Gradient Boosting Classifier
from sklearn.ensemble import GradientBoostingClassifier

gbk = GradientBoostingClassifier()
gbk.fit(X_train, y_train)
y_pred = gbk.predict(X_test)
acc_gbk = round(accuracy_score(y_pred, y_test) * 100, 2)
print(acc_gbk)

76.0

In [ ]: