# start the inline backend for plotting %matplotlib inline # Import the example plot from the figures directory from fig_code import plot_sgd_separator plot_sgd_separator() #Uncomment the %load command to load the contents of the file # %load fig_code/sgd_separator.py from fig_code import plot_linear_regression plot_linear_regression() from IPython.core.display import Image, display display(Image(filename='images/iris_setosa.jpg')) print("Iris Setosa\n") display(Image(filename='images/iris_versicolor.jpg')) print("Iris Versicolor\n") display(Image(filename='images/iris_virginica.jpg')) print("Iris Virginica") from sklearn.datasets import load_iris iris = load_iris() iris.keys() n_samples, n_features = iris.data.shape print((n_samples, n_features)) print(iris.data[0]) print(iris.data.shape) print(iris.target.shape) print(iris.target) print(iris.target_names) import numpy as np import matplotlib.pyplot as plt x_index = 0 y_index = 1 # this formatter will label the colorbar with the correct target names formatter = plt.FuncFormatter(lambda i, *args: iris.target_names[int(i)]) plt.scatter(iris.data[:, x_index], iris.data[:, y_index], c=iris.target, cmap=plt.cm.get_cmap('RdYlBu', 3)) plt.colorbar(ticks=[0, 1, 2], format=formatter) plt.clim(-0.5, 2.5) plt.xlabel(iris.feature_names[x_index]) plt.ylabel(iris.feature_names[y_index]); from sklearn import datasets # Type datasets.fetch_ in IPython to see all possibilities # datasets.load_ from astroML import datasets # Use tab completion to explore datasets # datasets.fetch_ from sklearn.linear_model import LinearRegression model = LinearRegression(normalize=True) print(model.normalize) print(model) x = np.arange(10) y = 2 * x + 1 print(x) print(y) plt.plot(x, y, 'o'); # The input data for sklearn is 2D: (samples == 3 x features == 1) X = x[:, np.newaxis] print(X) print(y) # fit the model on our data model.fit(X, y) # underscore at the end indicates a fit parameter print(model.coef_) print(model.intercept_) model.residues_ from sklearn import neighbors, datasets iris = datasets.load_iris() X, y = iris.data, iris.target # create the model knn = neighbors.KNeighborsClassifier(n_neighbors=5) # fit the model knn.fit(X, y) # What kind of iris has 3cm x 5cm sepal and 4cm x 2cm petal? # call the "predict" method: result = knn.predict([[3, 5, 4, 2],]) print(iris.target_names[result]) knn.predict_proba([[3, 5, 4, 2],]) from fig_code import plot_iris_knn plot_iris_knn() from sklearn.svm import SVC model = SVC() model.fit(X, y) result = model.predict([[3, 5, 4, 2],]) print(iris.target_names[result]) # Create some simple data import numpy as np np.random.seed(0) X = np.random.random(size=(20, 1)) y = 3 * X.squeeze() + 2 + np.random.randn(20) # Fit a Random Forest from sklearn.ensemble import RandomForestRegressor model = RandomForestRegressor() model.fit(X, y) # Plot the data and the model prediction X_test = np.linspace(0, 1, 100)[:, np.newaxis] y_test = model.predict(X_test) plt.plot(X.squeeze(), y, 'o') plt.plot(X_test.squeeze(), y_test); X, y = iris.data, iris.target from sklearn.decomposition import PCA pca = PCA(n_components=2) pca.fit(X) X_reduced = pca.transform(X) print("Reduced dataset shape:", X_reduced.shape) import pylab as pl pl.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y, cmap='RdYlBu') print("Meaning of the 2 components:") for component in pca.components_: print(" + ".join("%.3f x %s" % (value, name) for value, name in zip(component, iris.feature_names))) from sklearn.cluster import KMeans k_means = KMeans(n_clusters=3, random_state=0) # Fixing the RNG in kmeans k_means.fit(X) y_pred = k_means.predict(X) pl.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y_pred, cmap='RdYlBu'); from sklearn.neighbors import KNeighborsClassifier X, y = iris.data, iris.target clf = KNeighborsClassifier(n_neighbors=1) clf.fit(X, y) y_pred = clf.predict(X) print(np.all(y == y_pred)) from sklearn.metrics import confusion_matrix print(confusion_matrix(y, y_pred)) from sklearn.cross_validation import train_test_split Xtrain, Xtest, ytrain, ytest = train_test_split(X, y) clf.fit(Xtrain, ytrain) ypred = clf.predict(Xtest) print(confusion_matrix(ytest, ypred)) from IPython.display import Image Image("http://scikit-learn.org/dev/_static/ml_map.png") from sklearn import datasets digits = datasets.load_digits() digits.images.shape fig, axes = plt.subplots(10, 10, figsize=(8, 8)) fig.subplots_adjust(hspace=0.1, wspace=0.1) for i, ax in enumerate(axes.flat): ax.imshow(digits.images[i], cmap='binary') ax.text(0.05, 0.05, str(digits.target[i]), transform=ax.transAxes, color='green') ax.set_xticks([]) ax.set_yticks([]) # The images themselves print(digits.images.shape) print(digits.images[0]) # The data for use in our algorithms print(digits.data.shape) print(digits.data[0]) # The target label print(digits.target) from sklearn.manifold import Isomap iso = Isomap(n_components=2) data_projected = iso.fit_transform(digits.data) data_projected.shape plt.scatter(data_projected[:, 0], data_projected[:, 1], c=digits.target, edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('nipy_spectral', 10)); plt.colorbar(label='digit label', ticks=range(10)) plt.clim(-0.5, 9.5) from sklearn.cross_validation import train_test_split Xtrain, Xtest, ytrain, ytest = train_test_split(digits.data, digits.target, random_state=2) print(Xtrain.shape, Xtest.shape) from sklearn.linear_model import LogisticRegression clf = LogisticRegression(penalty='l2') clf.fit(Xtrain, ytrain) ypred = clf.predict(Xtest) from sklearn.metrics import accuracy_score accuracy_score(ytest, ypred) from sklearn.metrics import confusion_matrix print(confusion_matrix(ytest, ypred)) plt.imshow(np.log(confusion_matrix(ytest, ypred)), cmap='Blues', interpolation='nearest') plt.ylabel('true') plt.xlabel('predicted'); fig, axes = plt.subplots(10, 10, figsize=(8, 8)) fig.subplots_adjust(hspace=0.1, wspace=0.1) for i, ax in enumerate(axes.flat): ax.imshow(Xtest[i].reshape(8, 8), cmap='binary') ax.text(0.05, 0.05, str(ypred[i]), transform=ax.transAxes, color='green' if (ytest[i] == ypred[i]) else 'red') ax.set_xticks([]) ax.set_yticks([])