import pandas as pd
import numpy as np
import sklearn as sk
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import KFold
import random
import matplotlib.pyplot as plt
%matplotlib inline
Train = pd.read_csv('/home/student/SPECT.train', header=None)
Test = pd.read_csv('/home/student/SPECT.test' ,header=None)
frames = [Train, Test]
Data = np.array(pd.concat(frames).values)
Y,X = Data[:,0],Data[:,1:]
x_train, x_test, y_train, y_test = train_test_split(X,Y,train_size=0.9,test_size=0.1)
x_train.shape, x_test, y_train, y_test
((240, 22), array([[1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0], [1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1], [1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1], [0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1], [1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1], [1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0], [1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1], [1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1], [0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0]]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1]), array([0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1]))
input_array = x_train
weights = [random.random() for x in range(22)]
s = sum(weights)
n_weights = [x/s for x in weights]
weights = n_weights
threshold = 0.5
actual_result = 1
learning_rate = 0.5
print(weights)
[0.03385511869397899, 0.06878489913730873, 0.07029712556220485, 0.0634493722610696, 0.06799944392105774, 0.07949590984370473, 0.04644816506348176, 0.055354324283170935, 0.067042440358336, 0.07758983139760595, 0.06355426822930273, 0.039611520541607596, 0.06814338785741138, 0.026586052976336465, 0.010372187070941386, 0.055822484955270125, 0.010100802025692903, 0.025179590172097695, 0.000509769752051636, 0.023711010123725288, 0.04436187929950815, 0.0017304164741353504]
def percept(input_array,y_train,weights,lr):
n = len(input_array)
m = len(input_array[0])
error_flag = 0
for i in range(n):
actual_result = y_train[i]
inputs = input_array[i]
pred_op = sum([x*w for x,w in zip(inputs,weights)])
if pred_op > threshold:
OP = 1
else:
OP = 0
pred_op = OP
error = actual_result - pred_op
if error > 0:
error_flag = 1
for j in range(m):
weights[j] = weights[j] + lr*(actual_result-pred_op)*inputs[j]
#pred_op = sum([x*w for x,w in zip(inputs,weigts)])
return error_flag, weights
def Predict(weights,vector,my_res):
t = len(vector)
for it in range(t):
if np.array(vector[it]).dot(np.array(weights)) > threshold:
int_dot = 1
else:
int_dot = 0
my_res = np.append(my_res,int_dot)
return my_res
error_flag = 1
weights_ = np.array([])
acc = np.array([])
for threshold in np.arange(0.3,0.7,0.1):
accuracy = np.array([])
for learning_rate in np.arange(0.1,1,0.1):
my_res = np.array([])
#print(learning_rate)
it = 0
while it < 500:
error_flag_,weights_ = percept(input_array,y_train,weights,learning_rate)
#if it == 499:
#print(weights)
it += 1
my_res = Predict(weights_,x_test,my_res)
#print(len(my_res),len(y_test))
accuracy = np.append(accuracy,sk.metrics.accuracy_score(y_test,my_res,normalize=True,sample_weight=None))
#print(accuracy.shape)
acc = np.concatenate((acc, accuracy), axis=0)
#print(weights)
print(acc)
acc.shape
[ 0.88888889 0.81481481 0.77777778 0.85185185 0.81481481 0.81481481 0.81481481 0.81481481 0.81481481 0.85185185 0.81481481 0.77777778 0.81481481 0.77777778 0.77777778 0.81481481 0.85185185 0.81481481 0.77777778 0.81481481 0.81481481 0.74074074 0.77777778 0.85185185 0.77777778 0.77777778 0.81481481 0.85185185 0.88888889 0.85185185 0.81481481 0.81481481 0.81481481 0.77777778 0.77777778 0.81481481]
(36,)
acc_th = np.reshape(acc,(-1,9))
acc_th
array([[ 0.88888889, 0.81481481, 0.77777778, 0.85185185, 0.81481481, 0.81481481, 0.81481481, 0.81481481, 0.81481481], [ 0.85185185, 0.81481481, 0.77777778, 0.81481481, 0.77777778, 0.77777778, 0.81481481, 0.85185185, 0.81481481], [ 0.77777778, 0.81481481, 0.81481481, 0.74074074, 0.77777778, 0.85185185, 0.77777778, 0.77777778, 0.81481481], [ 0.85185185, 0.88888889, 0.85185185, 0.81481481, 0.81481481, 0.81481481, 0.77777778, 0.77777778, 0.81481481]])
thresholds = [0.3,0.4,0.5,0.6]
print('max accuracy =',max(acc),'with threshold =',0.4,'and learning rate =',0.8)
max accuracy = 0.888888888889 with threshold = 0.4 and learning rate = 0.8
x_axis = np.arange(0.1,1,0.1)
plt.gca().set_prop_cycle('color',['red','green','blue','yellow'])
plt.plot(x_axis,acc_th[0])
plt.plot(x_axis,acc_th[1])
plt.plot(x_axis,acc_th[2])
plt.plot(x_axis,acc_th[3])
plt.legend(['th=0.3','th=0.4','th=0.5','th=0.6'], loc='lower right')
plt.ylabel('accuracy')
plt.xlabel('learning rate')
plt.show()
kf = KFold(267,n_folds=10)
acc = 0
totcmat = np.zeros((2,2))
totacc = 0
totpre = 0
totrec = 0
error_flag = 1
weights_ = np.array([])
weights = [random.random() for x in range(22)]
s = sum(weights)
n_weights = [x/s for x in weights]
weights = n_weights
for train_index, test_index in kf:
my_res = np.array([])
#print("TRAIN:", train_index, "TEST:", test_index)
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = Y[train_index], Y[test_index]
it = 0
while it < 500:
error_flag_,weights_ = percept(X_train,y_train,weights,learning_rate)
#if it == 499:
#print(weights)
it += 1
my_res = Predict(weights_,X_test,my_res)
cmat = sk.metrics.confusion_matrix(y_test,my_res,[1,0])
totacc += (cmat[0][0]+cmat[1][1])/np.sum(cmat)
totcmat += cmat
totpre += (cmat[0][0])/(cmat[0][0]+cmat[0][1])
totrec += (cmat[0][0])/(cmat[0][0]+cmat[1][0])
acc = acc + np.sum(my_res == y_test)/len(my_res)
avgacc = totacc/10
avgcmat = totcmat/10
avgpre = totpre/10
avgrec = totrec/10
sk.metrics.confusion_matrix(y_test,my_res)
print(avgacc)
print(avgcmat)
print(avgpre)
print(avgrec)
0.568376068376 [[ 11.4 9.8] [ 1.7 3.8]] 0.502874902875 0.808823529412