import pandas as pd
import numpy as np
import sklearn as sk
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import KFold
import random
import math
import matplotlib.pyplot as plt
%matplotlib inline
Train = pd.read_csv('/home/student/SPECT.train', header=None)
Test = pd.read_csv('/home/student/SPECT.test' ,header=None)
frames = [Train, Test]
Data = np.array(pd.concat(frames).values)
Y,X = Data[:,0],Data[:,1:]
x_train, x_test, y_train, y_test = train_test_split(X,Y,train_size=0.9,test_size=0.1)
x_train.shape, x_test, y_train, y_test
((240, 22), array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1], [1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0], [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1], [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1], [1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1], [1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0], [0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0], [1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]]), array([1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1]), array([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]))
n = len(x_train)
m = len(x_train[0])
threshold = 0.85
learning_rate = 0.5
weights = np.random.random_sample((28,28))
bias = np.ones([28,1])
bias = (1/23)*bias
error = np.empty([28,1])
my_res = np.array([])
def Predict(weights,vector,my_res):
t = len(vector)
u = len(vector[0])
for i in range(t):
I_test = np.array([])
O_test = np.array([])
#First Layer
inputs = vector[i]
for j in range(u):
I_test = np.append(I_test,inputs[j])
O_test = np.append(O_test,inputs[j])
#layer 2
#Inputs
for j in range(5):
input_add = sum([weights[i,22+j]*O_test[i] for i in range(22)])
input_add = input_add + bias[22+j]
I_test = np.append(I_test,input_add)
#Outputs
for j in range(5):
output_add = 1/(1 + (math.exp(-I_test[22+j])))
O_test = np.append(O_test,output_add)
#Output Layer
input_add = sum([weights[i,27]*O_test[i] for i in range(22,27)])
input_add = input_add + bias[27]
I_test = np.append(I_test,input_add)
output_add = 1/(1 + (math.exp(-I_test[27])))
if output_add < threshold:
O_test = np.append(O_test,0)
else:
O_test = np.append(O_test,1)
my_res = np.append(my_res,O_test[27])
return my_res
n = len(x_train)
m = len(x_train[0])
acc = np.array([])
for threshold in np.arange(0.8,0.95,0.05):
accuracy = np.array([])
for learning_rate in np.arange(0.1,1,0.1):
my_res = np.array([])
it = 0
while it < 100:
for i in range(n):
I = np.array([])
O = np.array([])
#forward pass
#calculating net input and output for each node
#layer 1
inputs = x_train[i]
for j in range(m):
I = np.append(I,inputs[j])
O = np.append(O,inputs[j])
#layer 2
#Inputs
for j in range(5):
input_add = sum([weights[i,22+j]*O[i] for i in range(22)])
input_add = input_add + bias[22+j]
I = np.append(I,input_add)
#Outputs
for j in range(5):
output_add = 1/(1 + (math.exp(-I[22+j])))
O = np.append(O,output_add)
#Output Layer
input_add = sum([weights[i,27]*O[i] for i in range(22,27)])
input_add = input_add + bias[27]
I = np.append(I,input_add)
output_add = 1/(1 + (math.exp(-I[27])))
if output_add < threshold:
O = np.append(O,0)
else:
O = np.append(O,1)
#backward pass
#Output Layer
error[27] = O[27]*(1-O[27])*(y_train[i]-O[27])
bias[27] = bias[27] + learning_rate*error[27]
#Hidden Layer
for j in range(22,27):
error[j] = O[j]*(1-O[j])*(error[27]*weights[j,27])
bias[j] = bias[j] + learning_rate*error[j]
weights[j,27] = weights[j,27] + learning_rate*error[27]*O[j]
#First layer
for j in range(22):
error[j] = O[j]*(1-O[j])*sum([error[k]*weights[j,k] for k in range(22,27)])
bias[j] = bias[j] + learning_rate*error[j]
for l in range(22,27):
weights[j,l] = weights[j,l] + learning_rate * error[l]*O[j]
it += 1
my_res = Predict(weights,x_test,my_res)
accuracy = np.append(accuracy,sk.metrics.accuracy_score(y_test,my_res,normalize=True,sample_weight=None))
acc = np.concatenate((acc, accuracy), axis=0)
print(acc)
acc.shape
[ 0.85185185 0.85185185 0.85185185 0.85185185 0.85185185 0.85185185 0.85185185 0.85185185 0.85185185 0.88888889 0.88888889 0.88888889 0.88888889 0.88888889 0.88888889 0.88888889 0.88888889 0.88888889 0.18518519 0.18518519 0.18518519 0.18518519 0.18518519 0.18518519 0.18518519 0.18518519 0.18518519]
(27,)
acc_th = np.reshape(acc,(-1,9))
acc_th
array([[ 0.85185185, 0.85185185, 0.85185185, 0.85185185, 0.85185185, 0.85185185, 0.85185185, 0.85185185, 0.85185185], [ 0.88888889, 0.88888889, 0.88888889, 0.88888889, 0.88888889, 0.88888889, 0.88888889, 0.88888889, 0.88888889], [ 0.18518519, 0.18518519, 0.18518519, 0.18518519, 0.18518519, 0.18518519, 0.18518519, 0.18518519, 0.18518519]])
thresholds = [0.3,0.4,0.5,0.6]
print('max accuracy =',max(acc),'with threshold =',0.85,'and learning rate =',0.5)
max accuracy = 0.888888888889 with threshold = 0.85 and learning rate = 0.5
x_axis = np.arange(0.1,1,0.1)
plt.gca().set_prop_cycle('color',['red','green','blue','yellow'])
plt.plot(x_axis,acc_th[0])
plt.plot(x_axis,acc_th[1])
plt.plot(x_axis,acc_th[2])
plt.ylabel('accuracy')
plt.xlabel('learning rate')
plt.legend(['th=0.3','th=0.4','th=0.5','th=0.6'], loc='lower right')
plt.show()
kf = KFold(267,n_folds=10)
n = len(x_train)
m = len(x_train[0])
threshold = 0.85
learning_rate = 0.5
my_res = np.array([])
weights = np.random.random_sample((28,28))
acc = 0
totcmat = np.zeros((2,2))
totacc = 0
totpre = 0
totrec = 0
for train_index, test_index in kf:
my_res = np.array([])
#print("TRAIN:", train_index, "TEST:", test_index)
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = Y[train_index], Y[test_index]
it = 0
while it < 100:
for i in range(n):
I = np.array([])
O = np.array([])
#forward pass
#calculating net input and output for each node
#layer 1
inputs = x_train[i]
for j in range(m):
I = np.append(I,inputs[j])
O = np.append(O,inputs[j])
#layer 2
#Inputs
for j in range(5):
input_add = sum([weights[i,22+j]*O[i] for i in range(22)])
input_add = input_add + bias[22+j]
I = np.append(I,input_add)
#Outputs
for j in range(5):
output_add = 1/(1 + (math.exp(-I[22+j])))
O = np.append(O,output_add)
#Output Layer
input_add = sum([weights[i,27]*O[i] for i in range(22,27)])
input_add = input_add + bias[27]
I = np.append(I,input_add)
output_add = 1/(1 + (math.exp(-I[27])))
if output_add < threshold:
O = np.append(O,0)
else:
O = np.append(O,1)
#backward pass
#Output Layer
error[27] = O[27]*(1-O[27])*(y_train[i]-O[27])
bias[27] = bias[27] + learning_rate*error[27]
#Hidden Layer
for j in range(22,27):
error[j] = O[j]*(1-O[j])*(error[27]*weights[j,27])
bias[j] = bias[j] + learning_rate*error[j]
weights[j,27] = weights[j,27] + learning_rate*error[27]*O[j]
#First layer
for j in range(22):
error[j] = O[j]*(1-O[j])*sum([error[k]*weights[j,k] for k in range(22,27)])
bias[j] = bias[j] + learning_rate*error[j]
for l in range(22,27):
weights[j,l] = weights[j,l] + learning_rate * error[l]*O[j]
it += 1
my_res = Predict(weights,x_test,my_res)
accuracy = np.append(accuracy,sk.metrics.accuracy_score(y_test,my_res,normalize=True,sample_weight=None))
my_res = Predict(weights_,X_test,my_res)
cmat = sk.metrics.confusion_matrix(y_test,my_res,[1,0])
totacc += (cmat[0][0]+cmat[1][1])/np.sum(cmat)
totcmat += cmat
totpre += (cmat[0][0])/(cmat[0][0]+cmat[0][1])
totrec += (cmat[0][0])/(cmat[0][0]+cmat[1][0])
acc = acc + np.sum(my_res == y_test)/len(my_res)
avgacc = totacc/10
avgcmat = totcmat/10
avgpre = totpre/10
avgrec = totrec/10
sk.metrics.confusion_matrix(y_test,my_res)
print(avgacc)
print(avgcmat)
print(avgpre)
print(avgrec)