!date from pykalman.classifier import GenerativeBayes import pandas as pd import numpy as np rnorm = np.random.normal from sklearn.svm import SVC from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from sklearn.qda import QDA from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.mixture import GMM from sklearn.cross_validation import train_test_split %matplotlib inline import matplotlib.pylab as plt classifiers = [ DecisionTreeClassifier(), KNeighborsClassifier(3), LogisticRegression(), SVC(kernel="rbf"), AdaBoostClassifier(), GenerativeBayes(GMM(n_components=1,covariance_type='full', init_params='wc', n_iter=20)), GenerativeBayes(GMM(n_components=2,covariance_type='full', init_params='wc', n_iter=20)), QDA(), RandomForestClassifier()] clf_names = [ 'Decision Tree', 'K Neighbors', 'Logistic Regression', 'SVC (Gaussian)', 'Ada Boost', 'GMM (1 component)', 'GMM (2 component)', 'QDA', 'Random Forest'] def plot_results(classifiers, df): plt.figure(figsize=(14,14)) X = df[['x','y']] Y = df['label'] X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=.4) n_classes = len(Y.unique()) plot_colors = "brym" plot_step = 0.02 x_min, x_max = X.ix[:, 0].min() - 1, X.ix[:, 0].max() + 1 y_min, y_max = X.ix[:, 1].min() - 1, X.ix[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step), np.arange(y_min, y_max, plot_step)) for j, clf in enumerate(classifiers): clf.fit(X_train,y_train) score = clf.score(X_test, y_test) ax = plt.subplot(4,3,j+1) Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired) # Plot the training points for i, color in zip(range(n_classes), plot_colors): plt.scatter(X[Y==i].x, X[Y==i].y, c=color, label=i, cmap=plt.cm.Paired) ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'), size=15, horizontalalignment='right') plt.title(clf_names[j]) # Simple Linearly Seperated N = 50 p1 = pd.DataFrame(np.hstack((rnorm(loc=2.0, scale=0.5, size=(N,1)), rnorm(loc=2.0, scale=0.5, size=(N,1)))), columns=['x','y']) p1['label'] = 0 p2 = pd.DataFrame(np.hstack((rnorm(loc=1.0, scale=0.5, size=(N,1)), rnorm(loc=1.0, scale=0.5, size=(N,1)))), columns=['x','y']) p2['label'] = 1 df = pd.concat([p1, p2]) plot_results(classifiers, df) # 3 Class Linearly Seperated N = 50 p1 = pd.DataFrame(np.hstack((rnorm(loc=3.0, scale=0.5, size=(N,1)), rnorm(loc=3.0, scale=0.5, size=(N,1)))), columns=['x','y']) p1['label'] = 0 p2 = pd.DataFrame(np.hstack((rnorm(loc=2.0, scale=0.5, size=(N,1)), rnorm(loc=2.0, scale=0.5, size=(N,1)))), columns=['x','y']) p2['label'] = 1 p3 = pd.DataFrame(np.hstack((rnorm(loc=1.0, scale=0.5, size=(N,1)), rnorm(loc=1.0, scale=0.5, size=(N,1)))), columns=['x','y']) p3['label'] = 2 df = pd.concat([p1, p2,p3]) plot_results(classifiers, df) # XOR pattern (simple) N = 50 p1 = pd.DataFrame(np.hstack((rnorm(loc=1.0, scale=0.5, size=(N,1)), rnorm(loc=1.0, scale=0.5, size=(N,1)))), columns=['x','y']) p1['label'] = 0 p2 = pd.DataFrame(np.hstack((rnorm(loc=-1.0, scale=0.5, size=(N,1)), rnorm(loc=1.0, scale=0.5, size=(N,1)))), columns=['x','y']) p2['label'] = 1 p3 = pd.DataFrame(np.hstack((rnorm(loc=-1.0, scale=0.5, size=(N,1)), rnorm(loc=-1.0, scale=0.5, size=(N,1)))), columns=['x','y']) p3['label'] = 0 p4 = pd.DataFrame(np.hstack((rnorm(loc=1.0, scale=0.5, size=(N,1)), rnorm(loc=-1.0, scale=0.5, size=(N,1)))), columns=['x','y']) p4['label'] = 1 df = pd.concat([p1,p2,p3,p4]) plot_results(classifiers, df) # XOR pattern (complex) N = 50 p1 = pd.DataFrame(np.hstack((rnorm(loc=1.0, scale=1.0, size=(N,1)), rnorm(loc=1.0, scale=1.0, size=(N,1)))), columns=['x','y']) p1['label'] = 0 p2 = pd.DataFrame(np.hstack((rnorm(loc=-1.0, scale=1.0, size=(N,1)), rnorm(loc=1.0, scale=1.0, size=(N,1)))), columns=['x','y']) p2['label'] = 1 p3 = pd.DataFrame(np.hstack((rnorm(loc=-1.0, scale=1.0, size=(N,1)), rnorm(loc=-1.0, scale=1.0, size=(N,1)))), columns=['x','y']) p3['label'] = 0 p4 = pd.DataFrame(np.hstack((rnorm(loc=1.0, scale=1.0, size=(N,1)), rnorm(loc=-1.0, scale=1.0, size=(N,1)))), columns=['x','y']) p4['label'] = 1 df = pd.concat([p1,p2,p3,p4]) plot_results(classifiers, df) # 4 Class (complex) N = 100 p1 = pd.DataFrame(np.hstack((rnorm(loc=1.0, scale=1.0, size=(N,1)), rnorm(loc=1.0, scale=1.0, size=(N,1)))), columns=['x','y']) p1['label'] = 0 p2 = pd.DataFrame(np.hstack((rnorm(loc=-1.0, scale=1.0, size=(N,1)), rnorm(loc=1.0, scale=1.0, size=(N,1)))), columns=['x','y']) p2['label'] = 1 p3 = pd.DataFrame(np.hstack((rnorm(loc=-1.0, scale=1.0, size=(N,1)), rnorm(loc=-1.0, scale=1.0, size=(N,1)))), columns=['x','y']) p3['label'] = 2 p4 = pd.DataFrame(np.hstack((rnorm(loc=1.0, scale=1.0, size=(N,1)), rnorm(loc=-1.0, scale=1.0, size=(N,1)))), columns=['x','y']) p4['label'] = 3 df = pd.concat([p1,p2,p3,p4]) plot_results(classifiers, df)