%pylab inline
import sklearn

# Visualizes how a classifier would classify each point in a grid
# http://scikit-learn.org/stable/auto_examples/neighbors/plot_classification.html

from matplotlib.colors import ListedColormap
def decision_boundary(clf, X, Y):
    h = .02  # step size in the mesh

    # Create color maps
    cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
    cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])

    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, m_max] x[y_min, y_max].
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.figure()
    plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

    # Plot also the training points
    plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=cmap_bold)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())

    plt.show()

def plot_test_train(clf, Xtrain, Ytrain, Xtest):
    plt.prism()  # this sets a nice color map
    plt.scatter(Xtest[:, 0], Xtest[:, 1], c=clf.predict(Xtest), marker='^')
    plt.scatter(Xtrain[:, 0], Xtrain[:, 1], c=Ytrain)

from sklearn.datasets import load_digits
digits = load_digits()
print("images shape: %s" % str(digits.images.shape))
print("targets shape: %s" % str(digits.target.shape))
digit_X = digits.images.reshape(-1, 64)  # Reshape 8x8 images to length 64 vectors
digit_Y = digits.target # Get labels

plt.matshow(digits.images[0], cmap=plt.cm.Greys);

from sklearn.datasets import load_iris
iris = load_iris()
print iris.data.shape

IX  = iris.data # Get features
IY = iris.target # Get labels
print("X shape: {}".format(IX.shape))
print("Example features:\n {}".format(IX[:5]))
print("Labels:\n {}".format(IY[:70]))

from sklearn.datasets import make_blobs
BX, BY = make_blobs(cluster_std=1.6, random_state=9)
plt.scatter(BX[:, 0], BX[:, 1], c=BY);plt.show()

from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=1)

knn.fit(BX, BY)
decision_boundary(knn, BX, BY)

knn = KNeighborsClassifier(n_neighbors=10)

knn.fit(BX, BY)
decision_boundary(knn, BX, BY)

X, Y = sklearn.utils.shuffle(IX, IY)

for k in [1,3,5]:
    knn = KNeighborsClassifier(n_neighbors=k)
    for n in [10, 50, 100]:
        knn.fit(X[:n], Y[:n])
        print("{} {}: {}".format(k, n, knn.score(X[n:], Y[n:])))
    print

from sklearn.cross_validation import train_test_split

# Test on 1/3 of data
X_train, X_test, Y_train, Y_test = train_test_split(IX, IY, test_size=0.33)

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, Y_train)
knn.score(X_test, Y_test)

from sklearn.cross_validation import KFold
from sklearn.metrics import accuracy_score

def score(clf, X, Y, folds=2, verbose=False, metric=accuracy_score):
    predictions = np.zeros(len(Y))
    for i, (train, test) in enumerate(KFold(len(X), n_folds=folds, shuffle=True)):
        clf.fit(X[train], Y[train])
        predictions[test] = clf.predict(X[test])
        if verbose:
            print("Fold {}: {}".format(i + 1, accuracy_score(Y[test], predictions[test])))
    if metric:
        return metric(Y, predictions)
    return Y, predictions

for k in range(1, 10, 2):
    acc = score(KNeighborsClassifier(n_neighbors=k), IX, IY, folds=30)
    print("{}: {}".format(k, acc))

from sklearn import tree

dt = tree.DecisionTreeClassifier()
dt.fit(BX, BY)
decision_boundary(dt, BX, BY)

from sklearn.ensemble import RandomForestClassifier

df = RandomForestClassifier(n_estimators=10)
df.fit(BX, BY)
decision_boundary(df, BX, BY)

from sklearn import svm

clf = svm.SVC(kernel='linear')
clf.fit(BX, BY)
decision_boundary(clf, BX, BY)

from sklearn.datasets import make_circles
CX, CY = make_circles(factor=0.5, noise=0.2, random_state=1)
clf = svm.SVC(kernel='linear')
clf.fit(CX, CY)
decision_boundary(clf, CX, CY)

# Illustrate linearSVM on Circle dataset
clf = svm.SVC(kernel='rbf') # rbf is the default kernel type
clf.fit(CX, CY)
decision_boundary(clf, CX, CY)

# Illustrate linearSVM on Circle dataset
clf = svm.SVC()
clf.fit(BX, BY)
decision_boundary(clf, BX, BY)

from sklearn import metrics
clf = svm.SVC()
y, pred = score(clf, IX, IY, metric=None)
print(metrics.classification_report(y, pred))
print(metrics.confusion_matrix(y, pred))

clf = svm.SVC()
y, pred = score(clf, digit_X, digit_Y, folds=10, metric=None)
print(metrics.classification_report(y, pred))
print(metrics.confusion_matrix(y, pred))

clf = svm.SVC(kernel='linear') #This is a case where a different kernel helps
y, pred = score(clf, digit_X, digit_Y, folds=10, metric=None)
print(metrics.classification_report(y, pred))
print(metrics.confusion_matrix(y, pred))

from sklearn.datasets import load_boston
data = load_boston()
HX = data['data']
HY = data['target']

print data.DESCR[:1200]

from sklearn import linear_model as lm

y, pred = score(lm.LinearRegression(), HX, HY, folds=10, metric=None)
print(metrics.mean_squared_error(y, pred))

plt.hist(HY);plt.show()

# Example discovered cluster centers
from sklearn import cluster
km = cluster.KMeans(n_clusters=3)
Y_hat = km.fit(BX).labels_

plt.scatter(BX[:,0], BX[:,1], c=BY, alpha=0.4)
mu = km.cluster_centers_
plt.scatter(mu[:,0], mu[:,1], s=100, c=np.unique(Y_hat))
plt.show()

from sklearn.decomposition import RandomizedPCA
pca = RandomizedPCA(n_components=2)
proj = pca.fit_transform(digits.data)
plt.scatter(proj[:, 0], proj[:, 1], c=digits.target)
plt.colorbar();plt.show()

from sklearn.decomposition import RandomizedPCA
pca = RandomizedPCA(n_components=2)
proj = pca.fit_transform(IX)
plt.scatter(proj[:, 0], proj[:, 1], c=IY)
plt.colorbar();plt.show()

from sklearn.manifold import Isomap
iso = Isomap(n_neighbors=5, n_components=2)
proj = iso.fit_transform(digits.data)
plt.scatter(proj[:, 0], proj[:, 1], c=digits.target)
plt.colorbar();plt.show()

#from sklearn.manifold import MDS
#mds = MDS()
#proj = mds.fit_transform(digit_X)
#plt.scatter(proj[:, 0], proj[:, 1], c=digit_Y)
#plt.colorbar();plt.show()

from sklearn.grid_search import GridSearchCV

param_grid = [
    {'C': [1, 10], 'kernel': ['linear']},
    {'C': [1, 10], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]

gs = GridSearchCV(svm.SVC(), param_grid)
gs.fit(digit_X, digit_Y) # Let's try it on MNIST
print(gs.best_params_)
print(gs.grid_scores_)

gs.fit(IX, IY) # Different settings work better for the iris dataset
print(gs.best_params_)
print(gs.grid_scores_)

from sklearn.pipeline import Pipeline

pca = RandomizedPCA(n_components=16)
clf = svm.SVC(kernel='linear')

pipeline = Pipeline(steps=[('PCA', pca), ('SVM', clf)])

# fit/predict work in the same way as other classifiers
# pipeline.fit(X[train], Y[train])
# pipeline.predict(X[test], Y[test])

score(pipeline, digit_X, digit_Y, folds=10)