# Get the data from sklearn.datasets import load_digits digits = load_digits() X = digits.data y = digits.target # Instantiate and train the classifier from sklearn.neighbors import KNeighborsClassifier clf = KNeighborsClassifier(n_neighbors=1) clf.fit(X, y) # Check the results using metrics from sklearn import metrics y_pred = clf.predict(X) print(metrics.confusion_matrix(y_pred, y)) %matplotlib inline from matplotlib import pyplot as plt import numpy as np from sklearn.datasets import load_boston from sklearn.tree import DecisionTreeRegressor data = load_boston() clf = DecisionTreeRegressor().fit(data.data, data.target) predicted = clf.predict(data.data) expected = data.target plt.scatter(expected, predicted) plt.plot([0, 50], [0, 50], '--k') plt.axis('tight') plt.xlabel('True price ($1000s)') plt.ylabel('Predicted price ($1000s)') from sklearn import cross_validation X = digits.data y = digits.target X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.25, random_state=0) print("%r, %r, %r" % (X.shape, X_train.shape, X_test.shape)) clf = KNeighborsClassifier(n_neighbors=1).fit(X_train, y_train) y_pred = clf.predict(X_test) print(metrics.confusion_matrix(y_test, y_pred)) print(metrics.classification_report(y_test, y_pred)) metrics.f1_score(y_test, y_pred) metrics.f1_score(y_train, clf.predict(X_train)) data = load_boston() X = data.data y = data.target X_train, X_test, y_train, y_test = cross_validation.train_test_split( X, y, test_size=0.25, random_state=0) print("%r, %r, %r" % (X.shape, X_train.shape, X_test.shape)) est = DecisionTreeRegressor().fit(X_train, y_train) validation_score = metrics.explained_variance_score( y_test, est.predict(X_test)) print("validation: %r" % validation_score) training_score = metrics.explained_variance_score( y_train, est.predict(X_train)) print("training: %r" % training_score) from sklearn.ensemble import GradientBoostingRegressor est = GradientBoostingRegressor().fit(X_train, y_train) validation_score = metrics.explained_variance_score( y_test, est.predict(X_test)) print("validation: %r" % validation_score) training_score = metrics.explained_variance_score( y_train, est.predict(X_train)) print("training: %r" % training_score) from sklearn.svm import LinearSVC from sklearn.naive_bayes import GaussianNB from sklearn.neighbors import KNeighborsClassifier %load solutions/05C_validation_exercise.py