# Import the required modules
%pylab inline
from theano import tensor as T
from theano import function
from theano import pp
from theano import Param
from theano import grad
from theano import shared
import numpy as np

class LogisticRegression:
    '''
    This class performs logistic regression.
    '''
    def __init__(self, alpha=0.001, iterations=10000, threshold=.5):
        '''
        1. alpha is the learning rate. 
            The default value of alpha is 0.001.
        2. iterations is the number of times the weights will be updated.
            The default value is 1000
        3. threshold is the threshold used to convert the probability
            to either 0 or 1 class label
        '''
        # Set the learning rate
        self.alpha = alpha
        # Set the number of iterations
        self.iterations = iterations
        # Set the threshold value
        self.threshold = threshold
        
        # Define the symbolic Variables
        X, W = T.dmatrices('training-set', 'weights')
        y = T.matrix('testing-set') 
        m, a = T.scalars('no-of-samples', 'learning-rate')
        
        # Define the cost function and the gradient functions
        sigmoid_fn = 1/(1 + T.exp(-T.dot(X, W)))
        h = sigmoid_fn
        cost_fn = -((T.dot(y, T.transpose(h)) + T.dot(1 - y, T.transpose(1 - h)))/(1 * m)).sum()
        grad_fn = (T.dot(T.transpose(X), h - y))/(m)
        updated_wts = W - a * grad_fn
        
        # Compile the Theano functions
        self.cost = function([X, y, W, m], cost_fn)
        self.new_wts = function([X, y, W, m, a], updated_wts)
        self.pred = function([X, W], h)
        
    def fit(self, X, y):
        '''
        The fit member function is used to train the classifier.
        1. X is array-like data vector
        2. y is array-like target vector
        '''
        # Add the bias feature to the data samples
        bias = np.ones((X.shape[0], 1))
        X = np.hstack((X, bias))
        
        # m is the number of data samples used
        m = X.shape[0]
        
        # Initialize all weights to 0
        self.W = np.zeros((X[0].shape[0],1), dtype=np.float)
        
        # Iterate and update the cost function and the weights
        for i in xrange(self.iterations):
            cst = self.cost(X, y, self.W, m)
            self.W = self.new_wts(X, y, self.W, m, self.alpha)    
        
        # Set the coef_ and intercept_
        self.coef_ = self.W[:-1]
        self.intercept_ = self.W[-1]
            
    def predict(self, X):
        '''
        This is function to predict target value. 
        
        1. X is array-like data vector whose target vector is predicted
        '''
        # Add the bias feature to the data samples
        bias = np.ones((X.shape[0], 1))
        X = np.hstack((X, bias))
        
        # Return the predicted target label
        return (self.pred(X, self.W) > self.threshold) * 1
    
    def predict_proba(self, X):
        '''
        This is function gives the probability of how sure. 
        
        1. X is array-like data vector whose target vector is predicted
        '''
        # Add the bias feature to the data samples
        bias = np.ones((X.shape[0], 1))
        X = np.hstack((X, bias))
        
        # Return the predicted target label
        prob = self.pred(X, self.W)
        for index, value in enumerate(prob < self.threshold):
            if value:
                prob[index] = 1 - prob[index]
        return prob

# Prepare the train data
x1 = [[x, y] for x in range(3, 8) for y in range(1, 6) if x - 3 > y - 1 ]
x2 = [[x, y] for x in range(1, 6) for y in range(3, 8) if y - 3 > x - 1 ]
X_train = []
for x in x1:
    X_train.append(x)
for x in x2:
    X_train.append(x)
X_train = np.array(X_train)

# Prepare the train data labels
y1 = [[0]]*len(x1)
y2 = [[1]]*len(x2)
y_train = [] 
for y in y1:
    y_train.append(y)
for y in y2:
    y_train.append(y)
y_train = np.array(y_train)

# Prepare the test data
X_test = [[randint(0, 8) , randint(0, 8)] for x in range(25)]
X_test = np.array(X_test)

# Fit and predict
clf = LogisticRegression(iterations=10000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_pred_prob = clf.predict_proba(X_test)

# NOTE :: This graphic will only work for 2-D features, although the
# code can work for any dimensions
# Plot the boundary
# Training set in stars
# Testing set in circles
# Also show the probability
plot(X_train[:,0][y_train[:,0]==0], X_train[:,1][y_train[:,0]==0], "*r")
plot(X_train[:,0][y_train[:,0]==1], X_train[:,1][y_train[:,0]==1], "*b")
plot(X_test[:,0][y_pred[:,0]==0], X_test[:,1][y_pred[:,0]==0], "or")
plot(X_test[:,0][y_pred[:,0]==1], X_test[:,1][y_pred[:,0]==1], "ob")
x_min, x_max, y_min, y_max = 0 , 8, 0, 8
axis([x_min, x_max, y_min, y_max])
y_min_val = (clf.coef_[1][0] * x_min + clf.intercept_[0])/-clf.coef_[0][0]
y_max_val = (clf.coef_[1][0] * x_max + clf.intercept_[0])/-clf.coef_[0][0]
plot([x_min, x_max], [y_min_val, y_max_val])
for index, (x, y) in enumerate(X_test):
    text(x, y, round(y_pred_prob[index][0], 2))