#!/usr/bin/env python
# coding: utf-8

# ### Machine Learning – A Gentle Introduction

# In[1]:


# import
from sklearn.datasets import load_iris
from sklearn.linear_model import SGDClassifier
from sklearn.cross_validation import train_test_split, KFold, cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn import preprocessing, pipeline

import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import seaborn as sns

get_ipython().run_line_magic('matplotlib', 'inline')
sns.set()


# In[2]:


#Loading the IRIS dataset
iris_data = load_iris()

X = iris_data['data']
y = iris_data['target']

print(iris_data['feature_names'])
print(iris_data['target_names'])


# In[3]:


# splitting and Pre-Processing the data
X_train, X_test, y_train,  y_test = train_test_split(X, y, test_size=0.25, random_state=33)
print(X_train[:2])
print("X_train shape", X_train.shape)
print("X_test shape", X_test.shape)

# Preprocessing and Standardize the features
scaler = preprocessing.StandardScaler().fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
print(X_train[:2])


# ** SGDClassifier **  
# 
#     SGD stands for Stochastic Gradient Descent, a very popular numerical procedure 
#     to find the local minimum of a function (in this case, the loss function, which 
#     measures how far every instance is from our boundary). The algorithm will learn the 
#     coefficients of the hyperplane by minimizing the loss function.

# In[4]:


# instantiate
sgd = SGDClassifier()

# fitting
sgd.fit(X_train, y_train)

# coefficient
print("coefficient", sgd.coef_)

# intercept
print("intercept: ", sgd.intercept_)

# predicting for one
y_pred = sgd.predict(scaler.transform([[4.9,3.1,1.5,0.1]]))
print(y_pred)

# predicting for X_test
y_pred = sgd.predict(X_test)

# checking accuracy score
print("Model Accuracy on Train data: ", accuracy_score(y_train, sgd.predict(X_train)))
print("Model Accuracy on Test data: ", accuracy_score(y_test, y_pred))


# In[5]:


# let's plot the data
plt.figure(figsize=(8,6))

plt.scatter(X_train[:,0][y_train==0],X_train[:,1][y_train==0],color='red', label='setosa')
plt.scatter(X_train[:,0][y_train==1],X_train[:,1][y_train==1],color='blue', label='verginica')
plt.scatter(X_train[:,0][y_train==2],X_train[:,1][y_train==2],color='green', label='versicolour')

plt.legend(loc='best')


# ** Classification Report **  
# 
#     Accuracy = (TP+TN)/m  
#     Precision = TP/(TP+FP)  
#     Recall = TP/(TP+FN)  
#     F1-score = 2 * Precision * Recall / (Precision + Recall)  
# 

# In[6]:


# predicting 
print(classification_report(y_pred=y_pred, y_true=y_test))


# In[7]:


confusion_matrix(y_pred=y_pred, y_true=y_test)


#      Using a pipeline mechanism to build and test our model 

# In[8]:


# create a composite estimator made by a pipeline of the standarization and the linear model
clf = pipeline.Pipeline([
        ('scaler', preprocessing.StandardScaler()),
        ('linear_model', SGDClassifier())
])

# create a k-fold cross validation iterator of k=5 folds
cv = KFold(X.shape[0], 5, shuffle=True, random_state=33)

# by default the score used is the one returned by score method of the estimator (accuracy)
scores = cross_val_score(clf, X, y, cv=cv)

print(scores)


# In[9]:


# mean accuracy 
print(np.mean(scores), sp.stats.sem(scores))


# In[ ]: