from sklearn.grid_search import GridSearchCV import numpy as np param_grid = {'C': 10. ** np.arange(-3, 3), 'gamma' : 10. ** np.arange(-3, 3)} print(param_grid) from sklearn.svm import SVC grid_search = GridSearchCV(SVC(), param_grid, verbose=3) from sklearn.datasets import load_digits from sklearn.cross_validation import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target) grid_search.fit(X_train, y_train) # We extract just the scores %matplotlib inline import matplotlib.pyplot as plt scores = [x[1] for x in grid_search.grid_scores_] scores = np.array(scores).reshape(6, 6) plt.matshow(scores) plt.xlabel('gamma') plt.ylabel('C') plt.colorbar() plt.xticks(np.arange(6), param_grid['gamma']) plt.yticks(np.arange(6), param_grid['C']) grid_search.best_params_ grid_search.predict(X_test) grid_search.score(X_test, y_test) from sklearn.preprocessing import StandardScaler scaler = StandardScaler() scaler.fit(X_train) scaler.transform(X_train).mean(axis=0) scaler.transform(X_train).std(axis=0) from sklearn.pipeline import Pipeline pipeline = Pipeline([("scaler", scaler), ("svm", SVC())]) pipeline.fit(X_train, y_train) pipeline.predict(X_train) from sklearn.cross_validation import cross_val_score cross_val_score(pipeline, X_train, y_train) param_grid_pipeline = {'svm__C': 10. ** np.arange(-3, 3), 'svm__gamma' : 10. ** np.arange(-3, 3)} grid_pipeline = GridSearchCV(pipeline, param_grid=param_grid_pipeline, verbose=3) grid_pipeline.fit(X_train, y_train) # We extract just the scores scores = [x[1] for x in grid_pipeline.grid_scores_] scores = np.array(scores).reshape(6, 6) plt.matshow(scores) plt.xlabel('gamma') plt.ylabel('C') plt.colorbar() plt.xticks(np.arange(6), param_grid['gamma']) plt.yticks(np.arange(6), param_grid['C']) grid_pipeline.score(X_test, y_test) from sklearn.grid_search import RandomizedSearchCV from scipy.stats import expon plt.hist([expon.rvs() for x in xrange(1000)]) params = {'C': expon(), 'gamma': expon()} rs = RandomizedSearchCV(SVC(), param_distributions=params, n_iter=50, verbose=3) rs.fit(X_train, y_train) rs.best_params_ rs.best_score_ scores, Cs, gammas = zip(*[(score.mean_validation_score, score.parameters['C'], score.parameters['gamma']) for score in rs.grid_scores_]) plt.scatter(Cs, gammas, s=40, c=scores) plt.xlabel("C") plt.ylabel("gamma")