#!/usr/bin/env python
# coding: utf-8

# In[366]:


get_ipython().run_line_magic('load_ext', 'watermark')
get_ipython().run_line_magic('watermark', '-a "Romell D.Z." -u -d -p numpy,pandas,matplotlib,sklearn')


# # 1. Supervised Learning

# In[367]:


from __future__ import division
import warnings
warnings.simplefilter('ignore' )

get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (12,6) 
get_ipython().run_line_magic('config', "InlineBackend.figure_format = 'retina'")
import numpy as np
import pandas as pd


# In[368]:


df_global = pd.read_csv('global_metrics.csv')
print('Columns into dataFrame:')
print(df_global.columns)
df_global = df_global.iloc[:,1:]


# ## Variables description of Global Statistic

# In[369]:


df_global.info()


# ## Descriptive statistics

# In[370]:


df_global.describe().T.iloc[:,1:]


# ## Descriptive analysis about hope of life

# In[371]:


ax = df_global.boxplot('esperanza_vida',by='region',rot=5)
plt.title('Hope of life by Region',);plt.xlabel('');plt.ylabel('')
plt.savefig('snapshot/global_hope_rate',bbox_inches='tight',dpi=100);


# In[372]:


df_global.boxplot('fertilidad','region',rot=5,)
plt.title('Fertility by Region',);plt.xlabel('');plt.ylabel('')
plt.savefig('snapshot/global_fertility_rate',bbox_inches='tight',dpi=100);


# In[373]:


y = df_global['esperanza_vida'].values
X = df_global['fertilidad'].values
X.shape,y.shape


# In[374]:


X,y = X.reshape(-1,1), y.reshape(-1,1)
X.shape,y.shape


# In[375]:


# plot the relation between variables
plt.scatter(X,y)
plt.title('Relation between Fertility & Hope of Life')
plt.xlabel('Fertilidad')
plt.ylabel('Esperanza de Vida')


# In[376]:


from sklearn.linear_model import LinearRegression 
reg = LinearRegression()

prediction_space = np.linspace(min(X), max(X)).reshape(-1,1)
# Train with all
reg.fit(X,y)
y_pred = reg.predict(prediction_space)

print('The R score:',reg.score(X, y)) # R^2
print('Slope %.4f & Intercept: %.4f' % (reg.coef_[0][0],reg.intercept_[0]))

plt.scatter(X, y,marker='*')
plt.plot(prediction_space, y_pred,marker='+', color='red', linewidth=3)
plt.legend(['%.4f %.4f * F ' % (reg.intercept_[0],reg.coef_[0][0])])
plt.margins(.05)
plt.title('Relation between Fertility & Hope of Life')
plt.xlabel('Fertilidad')
plt.ylabel('Esperanza de Vida')
plt.savefig('snapshot/global_fertility_hopelife',bbox_inches='tight',dpi=100);


# In[377]:


X = df_global.drop(['fertilidad','region'],axis=1)
y = df_global['fertilidad']

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42)
reg = LinearRegression()
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)

print("R^2: %.4f" % reg.score(X_test, y_test))
rmse = np.sqrt(mean_squared_error(y_test,y_pred))
print("RMSE: %.4f" % rmse )


# In[378]:


from sklearn.model_selection import cross_val_score
cv_scores = cross_val_score(reg,X,y,cv=7)
print('Cross valuation scores using 7-Fold:\n',cv_scores)
print("Average 7-Fold CV Score using cross validation: %.4f"%np.mean(cv_scores))


# In[379]:


print("Average 10-Fold CV Score using cross validation %.4f" % np.mean(cross_val_score(reg,X,y,cv=10)))


# In[380]:


from sklearn.model_selection import ShuffleSplit
cv = ShuffleSplit(n_splits=10, test_size=0.3)
scores = cross_val_score(reg, X, y, cv=cv)
print('Cross valuation scores using 10-ShuffleSplitFold:\n',cv_scores)
print('Accuary: %0.2f (+/- %.3f)' %(scores.mean(),scores.std()))


# In[381]:


cv = ShuffleSplit(n_splits=5, test_size=0.25)
scores = cross_val_score(reg, X, y, cv=cv)
print('Cross valuation scores using 5-ShuffleSplitFold:\n',cv_scores)
print('Accuary: %0.2f (+/- %.3f)' %(scores.mean(),scores.std()))


# In[382]:


from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(),reg)
scores = cross_val_score(clf, X, y, cv=cv,scoring='r2')
print('Cross valuation scores using StandarScale into pipline:\n',cv_scores)
print('Accuary with f1_macro: %0.2f (+/- %.3f)' %(scores.mean(),scores.std()))


# In[383]:


# dropping hope of life
X = df_global.drop(['esperanza_vida','region'],axis=1)
y = df_global['esperanza_vida'].values
df_columns = df_global.drop(['esperanza_vida','region'], axis=1).columns


# In[384]:


from sklearn.linear_model import Lasso

lasso4 = Lasso(alpha=0.4,normalize=True)
lasso4.fit(X,y)
print("Lasso Regresion with alpha .4:\n ",lasso4.coef_)

lasso2 = Lasso(alpha=0.2,normalize=True)
lasso2.fit(X,y)
print("Lasso Regresion with alpha .2:\n ",lasso2.coef_)

x_axis = range(len(X.columns))
# Plot the coefficients
plt.plot(x_axis, lasso4.coef_)
plt.plot(x_axis, lasso2.coef_)
plt.xticks(x_axis, X.columns.values, rotation=25)
plt.grid(True)
plt.margins(0.02)
plt.title('Coefficients using Lasso Regression')
plt.savefig('snapshot/global_lasso_regression',bbox_inches='tight',dpi=100);
plt.show()


# In[385]:


def plotRidgeRegressionScores(alpha_space,cv_scores, cv_scores_std):
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.plot(alpha_space, cv_scores)

    std_error = cv_scores_std / np.sqrt(10)

    ax.fill_between(alpha_space, cv_scores - std_error, cv_scores + std_error, alpha=0.2)
    ax.set_ylabel('CV Score +/- Std Error')
    ax.set_xlabel('Alpha')
    ax.axhline(np.max(cv_scores), linestyle='--', color='.5')
    ax.set_ylim([0,0.5])
    ax.set_xlim([alpha_space[0], alpha_space[-1]])
    ax.set_xscale('log')
    plt.show()


# In[386]:


from sklearn.linear_model import Ridge

alpha_space = np.logspace(-3, 0, 50)
ridge_reg_scores = []
ridge_reg_scores_std = []

ridge = Ridge(normalize=True)

for alpha in alpha_space:
    ridge.alpha = alpha
    ridge_reg_cv_scores = cross_val_score(ridge,X,y,cv=5)
    ridge_reg_scores.append(np.mean(ridge_reg_cv_scores))
    ridge_reg_scores_std.append(np.std(ridge_reg_cv_scores))

plotRidgeRegressionScores(alpha_space,ridge_reg_scores, ridge_reg_scores_std)


# In[387]:


diabetes = pd.read_csv('diabetes.csv').iloc[:,1:]
X = diabetes.drop(['diabetes'], axis=1)
y = diabetes['diabetes']
X.columns


# In[388]:


from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25)
knn = KNeighborsClassifier(n_neighbors=6)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)

print('K-Neighboors Classifier score: ',knn.score(X_test,y_test))
print('Callasification Report: \n',classification_report(y_test, y_pred,target_names=['No has Diabetes','Has Diabetes']))


# In[389]:


X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25,stratify=y)
knn = KNeighborsClassifier(n_neighbors=6)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)

print('K-Neighboors Classifier score: ',knn.score(X_test,y_test))
print('Callasification Report: \n',classification_report(y_test, y_pred,target_names=['No has Diabetes','Has Diabetes']))


# In[390]:


neighbors = np.arange(1, 15)
train = np.empty(len(neighbors))
test = np.empty(len(neighbors))
for i, k in enumerate(neighbors):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train,y_train)
    train[i] = knn.score(X_train, y_train)
    test[i] = knn.score(X_test, y_test)

plt.title('k-Nearst Neighbors: Varying Number of Neighbors')
plt.plot(neighbors, train, label = 'Training Accuracy')
plt.plot(neighbors, test, label = 'Testing Accuracy')
plt.legend(loc='best')
plt.xlabel('Number of Neighbors')
plt.ylabel('Accuracy')
plt.savefig('snapshot/global_varying_knn_numbers',bbox_inches='tight',dpi=100);


# ### Best Neighbors k is 5

# In[391]:


from sklearn.metrics import roc_curve,roc_auc_score
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression()
logreg.fit(X_train,y_train)

y_pred = logreg.predict(X_test)
y_pred_prob = logreg.predict_proba(X_test)[:,1]

fpr,tpr, thresholds = roc_curve(y_test, y_pred_prob)

# Plot ROC curve
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(['Random Math','ROC AUC score: %.4f'%roc_auc_score(y_test,y_pred)])
plt.title('ROC Curve: Logistic Regression')
plt.savefig('snapshot/global_roc_logistic_regression',bbox_inches='tight',dpi=100);


# In[392]:


cv_auc = cross_val_score(logreg,X,y,cv=10,scoring='roc_auc')

print("MEAN of AUC for 10-fold: %.4f" % np.mean(cv_auc))


# In[393]:


from sklearn.model_selection import GridSearchCV
c_space = np.logspace(-5, 8, 15)
param_grid = {'C': c_space,'penalty':['l1','l2']}

logreg_cv = GridSearchCV(logreg, param_grid, cv=5)
logreg_cv.fit(X_train,y_train)
y_pred = logreg_cv.predict(X_test)
y_pred_prob = logreg_cv.predict_proba(X_test)[:,1]
r2 = logreg_cv.score(X_test, y_test)
mse = mean_squared_error(y_test, y_pred)

print("Logistic Regression best Parameters: %s"%logreg_cv.best_params_)
print("Best score %.4f" % logreg_cv.best_score_)
print("Logistic Regression R squared: {}".format(r2))
print("Logistic Regression MSE: {}".format(mse))
print("AUC: {}".format(roc_auc_score(y_test, y_pred_prob)))


# In[394]:


from scipy.stats import randint
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import RandomizedSearchCV

param_dist = {"max_depth": [3,5,7, None],
              "max_features": randint(1, 9),
              "min_samples_leaf": randint(1, 9),
              "criterion": ["gini", "entropy"]}

tree = DecisionTreeClassifier()
tree_cv = RandomizedSearchCV(tree, param_dist, cv=5)
tree_cv.fit(X_train,y_train)
y_pred = tree_cv.predict(X_test)
y_pred_prob = tree_cv.predict_proba(X_test)[:,1]
r2 = tree_cv.score(X_test, y_test)
mse = mean_squared_error(y_test, y_pred)

print("Decision Tree best parameters: %s"%tree_cv.best_params_)
print("Best score %.4f"%tree_cv.best_score_)
print("DecisionTreeClassifier R squared: {}".format(r2))
print("DecisionTreeClassifier MSE: {}".format(mse))
print("AUC: {}".format(roc_auc_score(y_test, y_pred_prob)))


# In[395]:


from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error
l1_space = np.linspace(-1, 1, 30)
param_grid = {'l1_ratio': l1_space}

elastic_net = ElasticNet()
gm_cv = GridSearchCV(elastic_net, param_grid, cv=5)
gm_cv.fit(X_train,y_train)

y_pred = gm_cv.predict(X_test)
r2 = gm_cv.score(X_test, y_test)
mse = mean_squared_error(y_test, y_pred)

print("Tuned ElasticNet best params: {}".format(gm_cv.best_params_))
print("Tuned ElasticNet R squared: {}".format(r2))
print("Tuned ElasticNet MSE: {}".format(mse))


# In[396]:


df_global_dummy = pd.get_dummies(df_global,drop_first=True) # bye bye region_America
df_global_dummy.columns


# In[397]:


# if region_America is selected:
df_global_dummy.iloc[2:3,-5:] # dummy variables has zero


# In[398]:


X = df_global_dummy.drop('fertilidad',axis=1)
y = df_global_dummy[['fertilidad']]
from sklearn.linear_model import Ridge

ridge = Ridge(alpha=0.5,normalize=True)
ridge_cv = cross_val_score(ridge,X,y,cv=5)
print(ridge_cv)
print(np.mean(ridge_cv))


# In[399]:


ridge.fit(X,y)
plt.plot(range(len(ridge.coef_[0])),ridge.coef_[0])
plt.xticks(range(len(ridge.coef_[0])),X.columns,rotation=35)
plt.ylabel('Coefficient Values')
plt.title('Coefficients using Ridge Regression')
plt.savefig('snapshot/global_ridge_regression',bbox_inches='tight',dpi=100);


# In[400]:


X = diabetes.drop(['diabetes'], axis=1)
y = diabetes['diabetes']


# In[401]:


for i in range(30):
    idx = np.random.choice(np.arange(len(X)),1)
    idy = np.random.choice(np.arange(len(X.columns[:-1])),1)
    X.iloc[idx,idy] = np.nan
    
X.info()


# In[402]:


X.isnull().sum()


# In[403]:


X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,stratify=y)


# In[404]:


from sklearn.impute import SimpleImputer

imp = SimpleImputer(missing_values=np.NaN, strategy='mean')
X_clean = pd.DataFrame(imp.fit_transform(X),columns=X.columns)


# In[405]:


print('Are there any value null o fail value?')
X_clean.isnull().sum()


# In[406]:


from sklearn.impute import SimpleImputer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

imp = SimpleImputer(missing_values=np.NaN, strategy='mean')
svm = SVC(probability=True)

steps = [('imputer', imp), ('SVM', svm)]
pipeline = Pipeline(steps)
pipeline.fit(X_train,y_train)
y_pred = pipeline.predict(X_test)
y_pred_prob_svm = pipeline.predict_proba(X_test)[:,1]

print('classification report:\n',classification_report(y_test, y_pred))
print("ROC AUC: %.4f"%roc_auc_score(y_test,y_pred))


# In[407]:


steps = [('imputer', imp), ('knn', KNeighborsClassifier())]
pipeline = Pipeline(steps)
pipeline.fit(X_train,y_train)
y_pred = pipeline.predict(X_test)
y_pred_prob_knn = pipeline.predict_proba(X_test)[:,1]

print('classification report:\n',classification_report(y_test, y_pred))
print("ROC AUC: %.4f"%roc_auc_score(y_test,y_pred))


# In[408]:


steps = [('imputer', imp), ('knn', LogisticRegression())]
pipeline = Pipeline(steps)
pipeline.fit(X_train,y_train)
y_pred = pipeline.predict(X_test)
y_pred_prob_lgr = pipeline.predict_proba(X_test)[:,1]

print('classification report:\n',classification_report(y_test, y_pred))
print("ROC AUC: %.4f"%roc_auc_score(y_test,y_pred))


# In[409]:


steps = [('imputer', imp), ('knn', DecisionTreeClassifier(max_depth=5))]
pipeline = Pipeline(steps)
pipeline.fit(X_train,y_train)
y_pred = pipeline.predict(X_test)
y_pred_prob_dtc = pipeline.predict_proba(X_test)[:,1]

print('classification report:\n',classification_report(y_test, y_pred))
print("ROC AUC: %.4f"%roc_auc_score(y_test,y_pred))


# In[410]:


from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

pipeline = Pipeline([('imputer',imp),
                     ('scaler', StandardScaler()), 
                     ('knn', KNeighborsClassifier(n_neighbors=5))])
knn_standar_scale = pipeline.fit(X_train,y_train)
y_pred = knn_standar_scale.predict(X_test)
y_pred_prob_knn_ss = knn_standar_scale.predict_proba(X_test)[:,1]

print('classification report:\n',classification_report(y_test, y_pred))
print('KNN with 5 neighbors, Test Accuracy: %.4f'%knn_standar_scale.score(X_test,y_test))


# In[411]:


from sklearn.metrics import auc
for label,y_pred_prob in zip(["SVM","KNN","LGR","DTC","KNN_SS"],
                    [y_pred_prob_svm,y_pred_prob_knn,y_pred_prob_lgr,y_pred_prob_dtc,y_pred_prob_knn_ss]):
    fpr,tpr, _ = roc_curve(y_test, y_pred_prob)
    plt.plot(fpr, tpr,label=label+str(", AUC: %.4f"%auc(fpr,tpr)))

plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.title('Benchmarking between ROCs & AUCs')
plt.savefig('snapshot/global_benchmarking',bbox_inches='tight',dpi=100);


# # KNN SELECTED

# In[412]:


from sklearn.model_selection import learning_curve
from sklearn.model_selection import ShuffleSplit
plt.figure()
plt.title("KNN-6 Learning Curve")
plt.xlabel("Training examples")
plt.ylabel("Score")
cv = ShuffleSplit(n_splits=100, test_size=0.2)
train_sizes, train_scores, test_scores = learning_curve(
    knn_standar_scale, X, y, cv=cv, n_jobs=3, train_sizes=np.linspace(.1, 1.0, 15))
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
plt.grid()
print('train_sizes:',train_sizes)
plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.1,
                 color="r")
plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                 test_scores_mean + test_scores_std, alpha=0.1, color="g")
plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
         label="Training score")
plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
         label="Cross-validation score")
plt.legend(loc="lower right")
plt.savefig('snapshot/global_knn6_learning_curve',bbox_inches='tight',dpi=100);


# In[413]:


knn_standar_scale.get_params()


# In[414]:


param_range = np.linspace(3,14,12,dtype=int)
from sklearn.model_selection import validation_curve
train_scores, test_scores = validation_curve(
    knn_standar_scale, X, y, param_name="knn__n_neighbors", param_range=param_range,
    cv=cv, scoring="accuracy", n_jobs=3)
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)

plt.title("KNN-6 Validation Curve")
plt.xlabel("n neighbors")
plt.ylabel("Score")
plt.ylim(.6, 1)
lw = 2
plt.plot(param_range, train_scores_mean, label="Training score",
             color="darkorange", lw=lw)
plt.fill_between(param_range, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.2,
                 color="darkorange", lw=lw)
plt.plot(param_range, test_scores_mean, label="Cross-validation score",
             color="navy", lw=lw)
plt.fill_between(param_range, test_scores_mean - test_scores_std,
                 test_scores_mean + test_scores_std, alpha=0.2,
                 color="navy", lw=lw)
print(train_scores_mean, train_scores_std)
plt.legend(loc="best")
plt.savefig('snapshot/global_knn6_validation_curve',bbox_inches='tight',dpi=100);


# In[415]:


pipeline = Pipeline([('imputer',imp),
                     ('scaler', StandardScaler()), 
                     ('knn', KNeighborsClassifier())])
parameters = {'knn__n_neighbors':[3,4,5,6,7]}

cv = GridSearchCV(pipeline,param_grid=parameters)
cv.fit(X_train,y_train)
y_pred = cv.predict(X_test)

print('KNeighbors classifier - Classification Report: \n',classification_report(y_test, y_pred))
print("Best Parameters: %s"%cv.best_params_)
print("Best Accuracy: %.4f"%cv.best_score_)
print("Test Accuracy: %.4f"%cv.score(X_test, y_test))


# In[416]:


pipeline = Pipeline([('imputer',imp),
                     ('scaler', StandardScaler()), 
                     ('SVM', SVC())])
parameters = {'SVM__C':[1, 10, 100],
              'SVM__gamma':[0.1,0.01, 0.001]}

cv = GridSearchCV(pipeline,param_grid=parameters)
cv.fit(X_train,y_train)
y_pred = cv.predict(X_test)

print('Support Vector Classification - Classification Report: \n',classification_report(y_test, y_pred))
print("Best Parameters: %s"%cv.best_params_)
print("Best Accuracy: %.4f"%cv.best_score_)
print("Test Accuracy: %.4f"%cv.score(X_test, y_test))


# In[417]:


pipeline = Pipeline([('imputer',imp),
                     ('scaler', StandardScaler()), 
                     ('lgr', LogisticRegression())])
parameters = {'lgr__C':[1, 10, 100],
              'lgr__penalty':['l1', 'l2']}

cv = GridSearchCV(pipeline,param_grid=parameters)
cv.fit(X_train,y_train)
y_pred = cv.predict(X_test)

print('Logistic regression Classification Report: \n',classification_report(y_test, y_pred))
print("Best Parameters: %s"%cv.best_params_)
print("Best Accuracy: %.4f"%cv.best_score_)
print("Test Accuracy: %.4f"%cv.score(X_test, y_test))


# In[418]:


pipeline = Pipeline([('imputer',imp),
                     ('scaler', StandardScaler()), 
                     ('tree', DecisionTreeClassifier())])
parameters = {"tree__max_depth": [3,5,7, None],
              "tree__max_features": randint(1, 9),
              "tree__min_samples_leaf": randint(1, 9),
              "tree__criterion": ["gini", "entropy"]}

cv = RandomizedSearchCV(pipeline,parameters)
cv.fit(X_train,y_train)
y_pred = cv.predict(X_test)

print('Decision tree classifier Callasification Report: \n',classification_report(y_test, y_pred))
print("Best Parameters: %s"%cv.best_params_)
print("Best Accuracy: %.4f"%cv.best_score_)
print("Test Accuracy: %.4f"%cv.score(X_test, y_test))


# In[419]:


X = df_global_dummy.drop(['esperanza_vida'], axis=1)
y = df_global_dummy['esperanza_vida']


# In[420]:


X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.4,random_state=42)


# In[421]:


pipeline = Pipeline([('scaler', StandardScaler()),
                     ('elasticnet', ElasticNet())])

parameters = {"elasticnet__l1_ratio": np.linspace(0.1,0.9,9),
              "elasticnet__normalize": [True,False],
              "elasticnet__selection": ['cyclic','random']}

cv = GridSearchCV(pipeline,parameters)
cv.fit(X_train,y_train)
y_pred = cv.predict(X_test)

print("Best Parameters: %s"%cv.best_params_)
print("Best R^2: %.4f"%cv.best_score_)
print("Test R^2: %.4f"%cv.score(X_test, y_test))


# In[422]:


pipeline = Pipeline([('scaler', StandardScaler()),
                     ('lasso', Lasso())])

parameters = {"lasso__alpha": np.linspace(-0.5,0.5,20),
              "lasso__normalize": [True,False],
              "lasso__selection": ['cyclic','random']}

cv = GridSearchCV(pipeline,parameters)
cv.fit(X_train,y_train)
y_pred = cv.predict(X_test)

print("Best Parameters: %s"%cv.best_params_)
print("Best R^2: %.4f"%cv.best_score_)
print("Test R^2: %.4f"%cv.score(X_test, y_test))


# In[423]:


pipeline = Pipeline([('scaler', StandardScaler()),
                     ('ridge', Ridge())])

parameters = {"ridge__alpha": np.linspace(-1,1,20),
              "ridge__normalize": [True,False]}

cv = GridSearchCV(pipeline,parameters)
cv.fit(X_train,y_train)
y_pred = cv.predict(X_test)

print("Best Parameters: %s"%cv.best_params_)
print("Best R^2: {}".format(cv.best_score_))
print("Test R^2: %.4f"%cv.score(X_test, y_test))


# In[ ]: