#!/usr/bin/env python
# coding: utf-8

# In[1]:


import math
import matplotlib
import numpy as np
import sklearn

matplotlib.__version__,np.__version__,sklearn.__version__


# In[2]:


import matplotlib.pyplot as plt

from sklearn import metrics

from sklearn.svm import SVC
from sklearn.datasets import make_classification
from sklearn.model_selection import ParameterGrid, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler


# In[3]:


np.random.seed(222)
X, y = make_classification(
    n_samples=10000, 
    n_features=10,
    n_informative=10,
    n_redundant=0,
    weights=[0.3,0.7],
    class_sep=0.7,
    flip_y=0.35) # the default value for flip_y is 0.01, or 1%
X_train, _ , y_train, _ = train_test_split(X, y, test_size=0.25)


# In[4]:


X.mean(),y.mean()


# In[5]:


np.random.seed(222)
X, y = make_classification(
    n_samples=10000, 
    n_features=10,
    n_informative=10,
    n_redundant=0,
    weights=[0.3,0.7],
    class_sep=0.7,
    flip_y=0.0)
_, X_test , _ , y_test = train_test_split(X, y, test_size=0.25)


# In[6]:


X.mean(),y.mean()


# In[7]:


pipeline = Pipeline([
    ('prep',MinMaxScaler()),
    ('clf',SVC())
])


# ## linear kernel

# In[8]:


param_grid1 = [
    {
        'clf__kernel': ['linear'],
        'clf__C':[0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000],
        'clf__gamma':['auto']
    }
]


# In[22]:


get_ipython().run_cell_magic('time', '', 'num_cols = 3\nnum_rows = math.ceil(len(ParameterGrid(param_grid1)) / num_cols)\n\n# create a single figure\nplt.clf()\nfig,axes = plt.subplots(num_rows,num_cols,sharey=True)\nfig.set_size_inches(num_cols*5,num_rows*5)\n\nfor i,g in enumerate(ParameterGrid(param_grid1)):\n\n    pipeline.set_params(**g)\n    pipeline.fit(X_train,y_train)\n\n    y_preds = pipeline.decision_function(X_test)\n    \n    # fpr means false-positive-rate\n    # tpr means true-positive-rate\n    fpr, tpr, _ = metrics.roc_curve(y_test, y_preds)\n\n    auc_score = metrics.auc(fpr, tpr)\n\n    ax = axes[i // num_cols, i % num_cols]\n\n    # don\'t print the whole name or it won\'t fit\n    ax.set_title(str([r"{}:{}".format(\n        k.split(\'__\')[1:],v) for k,v in g.items() if "gamma" not in k and "kernel" not in k]),fontsize=15)\n    ax.plot(fpr, tpr, label=\'AUC = {:.3f}\'.format(auc_score))\n    ax.legend(loc=\'lower right\')\n\n    # it\'s helpful to add a diagonal to indicate where chance \n    # scores lie (i.e. just flipping a coin)\n    ax.plot([0,1],[0,1],\'r--\')\n\n    ax.set_xlim([-0.1,1.1])\n    ax.set_ylim([-0.1,1.1])\n    ax.set_ylabel(\'True Positive Rate\')\n    ax.set_xlabel(\'False Positive Rate\')\n\nplt.gcf().tight_layout()\nplt.show()\n')


# ## polynomial kernel degree=2

# In[10]:


param_grid2 = [
    {
        'clf__kernel': ['poly'],
        'clf__C':[0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000],
        'clf__degree':[2],
        'clf__gamma':['auto']
    }
]


# In[23]:


get_ipython().run_cell_magic('time', '', 'num_cols = 3\nnum_rows = math.ceil(len(ParameterGrid(param_grid2)) / num_cols)\n\n# create a single figure\nplt.clf()\nfig,axes = plt.subplots(num_rows,num_cols,sharey=True)\nfig.set_size_inches(num_cols*5,num_rows*5)\n\nfor i,g in enumerate(ParameterGrid(param_grid2)):\n\n    pipeline.set_params(**g)\n    pipeline.fit(X_train,y_train)\n\n    y_preds = pipeline.decision_function(X_test)\n    \n    # fpr means false-positive-rate\n    # tpr means true-positive-rate\n    fpr, tpr, _ = metrics.roc_curve(y_test, y_preds)\n\n    auc_score = metrics.auc(fpr, tpr)\n\n    ax = axes[i // num_cols, i % num_cols]\n\n    # don\'t print the whole name or it won\'t fit\n    ax.set_title(str([r"{}:{}".format(\n        k.split(\'__\')[1:],v) for k,v in g.items() if "gamma" not in k and "kernel" not in k]),fontsize=15)\n    ax.plot(fpr, tpr, label=\'AUC = {:.3f}\'.format(auc_score))\n    ax.legend(loc=\'lower right\')\n\n    # it\'s helpful to add a diagonal to indicate where chance \n    # scores lie (i.e. just flipping a coin)\n    ax.plot([0,1],[0,1],\'r--\')\n\n    ax.set_xlim([-0.1,1.1])\n    ax.set_ylim([-0.1,1.1])\n    ax.set_ylabel(\'True Positive Rate\')\n    ax.set_xlabel(\'False Positive Rate\')\n\nplt.gcf().tight_layout()\nplt.show()\n')


# ## polynomial kernel, degree=3

# In[12]:


param_grid3 = [
    {
        'clf__kernel': ['poly'],
        'clf__C':[0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000],
        'clf__degree':[3],
        'clf__gamma':['auto']
    }
]


# In[24]:


get_ipython().run_cell_magic('time', '', 'num_cols = 3\nnum_rows = math.ceil(len(ParameterGrid(param_grid3)) / num_cols)\n\n# create a single figure\nplt.clf()\nfig,axes = plt.subplots(num_rows,num_cols,sharey=True)\nfig.set_size_inches(num_cols*5,num_rows*5)\n\nfor i,g in enumerate(ParameterGrid(param_grid3)):\n\n    pipeline.set_params(**g)\n    pipeline.fit(X_train,y_train)\n\n    y_preds = pipeline.decision_function(X_test)\n    \n    # fpr means false-positive-rate\n    # tpr means true-positive-rate\n    fpr, tpr, _ = metrics.roc_curve(y_test, y_preds)\n\n    auc_score = metrics.auc(fpr, tpr)\n\n    ax = axes[i // num_cols, i % num_cols]\n\n    # don\'t print the whole name or it won\'t fit\n    ax.set_title(str([r"{}:{}".format(\n        k.split(\'__\')[1:],v) for k,v in g.items() if "gamma" not in k and "kernel" not in k]),fontsize=15)\n    ax.plot(fpr, tpr, label=\'AUC = {:.3f}\'.format(auc_score))\n    ax.legend(loc=\'lower right\')\n\n    # it\'s helpful to add a diagonal to indicate where chance \n    # scores lie (i.e. just flipping a coin)\n    ax.plot([0,1],[0,1],\'r--\')\n\n    ax.set_xlim([-0.1,1.1])\n    ax.set_ylim([-0.1,1.1])\n    ax.set_ylabel(\'True Positive Rate\')\n    ax.set_xlabel(\'False Positive Rate\')\n\nplt.gcf().tight_layout()\nplt.show()\n')


# ## polynomial kernel, degree=4

# In[25]:


param_grid4 = [
    {
        'clf__kernel': ['poly'],
        'clf__C':[0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000],
        'clf__degree':[4],
        'clf__gamma':['auto']
    }
]


# In[26]:


get_ipython().run_cell_magic('time', '', 'num_cols = 3\nnum_rows = math.ceil(len(ParameterGrid(param_grid4)) / num_cols)\n\n# create a single figure\nplt.clf()\nfig,axes = plt.subplots(num_rows,num_cols,sharey=True)\nfig.set_size_inches(num_cols*5,num_rows*5)\n\nfor i,g in enumerate(ParameterGrid(param_grid4)):\n\n    pipeline.set_params(**g)\n    pipeline.fit(X_train,y_train)\n\n    y_preds = pipeline.decision_function(X_test)\n    \n    # fpr means false-positive-rate\n    # tpr means true-positive-rate\n    fpr, tpr, _ = metrics.roc_curve(y_test, y_preds)\n\n    auc_score = metrics.auc(fpr, tpr)\n\n    ax = axes[i // num_cols, i % num_cols]\n\n    # don\'t print the whole name or it won\'t fit\n    ax.set_title(str([r"{}:{}".format(\n        k.split(\'__\')[1:],v) for k,v in g.items() if "gamma" not in k and "kernel" not in k]),fontsize=15)\n    ax.plot(fpr, tpr, label=\'AUC = {:.3f}\'.format(auc_score))\n    ax.legend(loc=\'lower right\')\n\n    # it\'s helpful to add a diagonal to indicate where chance \n    # scores lie (i.e. just flipping a coin)\n    ax.plot([0,1],[0,1],\'r--\')\n\n    ax.set_xlim([-0.1,1.1])\n    ax.set_ylim([-0.1,1.1])\n    ax.set_ylabel(\'True Positive Rate\')\n    ax.set_xlabel(\'False Positive Rate\')\n\nplt.gcf().tight_layout()\nplt.show()\n')


# ## polynomial kernel, degree=5

# In[27]:


param_grid5 = [
    {
        'clf__kernel': ['poly'],
        'clf__C':[0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000],
        'clf__degree':[5],
        'clf__gamma':['auto']
    }
]


# In[28]:


get_ipython().run_cell_magic('time', '', 'num_cols = 3\nnum_rows = math.ceil(len(ParameterGrid(param_grid5)) / num_cols)\n\n# create a single figure\nplt.clf()\nfig,axes = plt.subplots(num_rows,num_cols,sharey=True)\nfig.set_size_inches(num_cols*5,num_rows*5)\n\nfor i,g in enumerate(ParameterGrid(param_grid5)):\n\n    pipeline.set_params(**g)\n    pipeline.fit(X_train,y_train)\n\n    y_preds = pipeline.decision_function(X_test)\n    \n    # fpr means false-positive-rate\n    # tpr means true-positive-rate\n    fpr, tpr, _ = metrics.roc_curve(y_test, y_preds)\n\n    auc_score = metrics.auc(fpr, tpr)\n\n    ax = axes[i // num_cols, i % num_cols]\n\n    # don\'t print the whole name or it won\'t fit\n    ax.set_title(str([r"{}:{}".format(\n        k.split(\'__\')[1:],v) for k,v in g.items() if "gamma" not in k and "kernel" not in k]),fontsize=15)\n    ax.plot(fpr, tpr, label=\'AUC = {:.3f}\'.format(auc_score))\n    ax.legend(loc=\'lower right\')\n\n    # it\'s helpful to add a diagonal to indicate where chance \n    # scores lie (i.e. just flipping a coin)\n    ax.plot([0,1],[0,1],\'r--\')\n\n    ax.set_xlim([-0.1,1.1])\n    ax.set_ylim([-0.1,1.1])\n    ax.set_ylabel(\'True Positive Rate\')\n    ax.set_xlabel(\'False Positive Rate\')\n\nplt.gcf().tight_layout()\nplt.show()\n')


# ## rbf kernel

# In[18]:


param_grid6 = [
    {
        'clf__kernel': ['rbf'],
        'clf__C':[0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000],
        'clf__gamma':['auto']
    }
]


# In[29]:


get_ipython().run_cell_magic('time', '', 'num_cols = 3\nnum_rows = math.ceil(len(ParameterGrid(param_grid6)) / num_cols)\n\n# create a single figure\nplt.clf()\nfig,axes = plt.subplots(num_rows,num_cols,sharey=True)\nfig.set_size_inches(num_cols*5,num_rows*5)\n\nfor i,g in enumerate(ParameterGrid(param_grid6)):\n\n    pipeline.set_params(**g)\n    pipeline.fit(X_train,y_train)\n\n    y_preds = pipeline.decision_function(X_test)\n    \n    # fpr means false-positive-rate\n    # tpr means true-positive-rate\n    fpr, tpr, _ = metrics.roc_curve(y_test, y_preds)\n\n    auc_score = metrics.auc(fpr, tpr)\n\n    ax = axes[i // num_cols, i % num_cols]\n\n    ax.set_title(str([r"{}:{}".format(\n        k.split(\'__\')[1:],v) for k,v in g.items() if "gamma" not in k and "kernel" not in k]),fontsize=15)\n    ax.plot(fpr, tpr, label=\'AUC = {:.3f}\'.format(auc_score))\n    ax.legend(loc=\'lower right\')\n\n    # it\'s helpful to add a diagonal to indicate where chance \n    # scores lie (i.e. just flipping a coin)\n    ax.plot([0,1],[0,1],\'r--\')\n\n    ax.set_xlim([-0.1,1.1])\n    ax.set_ylim([-0.1,1.1])\n    ax.set_ylabel(\'True Positive Rate\')\n    ax.set_xlabel(\'False Positive Rate\')\n\nplt.gcf().tight_layout()\nplt.show()\n')