%pylab inline import pylab as pl import numpy as np from sklearn.linear_model import LinearRegression model = LinearRegression(normalize=True) print model.normalize print model x = np.array([0, 1, 2]) y = np.array([0, 1, 2]) _ = pl.plot(x, y, marker='o') X = x[:, np.newaxis] # The input data for sklearn is 2D: (samples == 3 x features == 1) X model.fit(X, y) model.coef_ from sklearn import neighbors, datasets iris = datasets.load_iris() X, y = iris.data, iris.target knn = neighbors.KNeighborsClassifier(n_neighbors=1) knn.fit(X, y) # What kind of iris has 3cm x 5cm sepal and 4cm x 2cm petal? print iris.target_names[knn.predict([[3, 5, 4, 2]])] # A plot of the sepal space and the prediction of the KNN from helpers import plot_iris_knn plot_iris_knn() # Create some simple data import numpy as np np.random.seed(0) X = np.random.random(size=(20, 1)) y = 3 * X.squeeze() + 2 + np.random.normal(size=20) # Fit a linear regression to it from sklearn.linear_model import LinearRegression model = LinearRegression(fit_intercept=True) model.fit(X, y) print "Model coefficient: %.5f, and intercept: %.5f" % (model.coef_, model.intercept_) # Plot the data and the model prediction X_test = np.linspace(0, 1, 100)[:, np.newaxis] y_test = model.predict(X_test) import pylab as pl pl.plot(X.squeeze(), y, 'o') pl.plot(X_test.squeeze(), y_test) X, y = iris.data, iris.target from sklearn.decomposition import PCA pca = PCA(n_components=2) pca.fit(X) X_reduced = pca.transform(X) print "Reduced dataset shape:", X_reduced.shape import pylab as pl pl.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y) print "Meaning of the 2 components:" for component in pca.components_: print " + ".join("%.3f x %s" % (value, name) for value, name in zip(component, iris.feature_names)) from sklearn.cluster import KMeans k_means = KMeans(n_clusters=3, random_state=0) # Fixing the RNG in kmeans k_means.fit(X_reduced) y_pred = k_means.predict(X_reduced) pl.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y_pred) rng = np.random.RandomState(0) x = 2 * rng.rand(100) - 1 f = lambda t: 1.2 * t ** 2 + .1 * t ** 3 - .4 * t ** 5 - .5 * t ** 9 y = f(x) + .4 * rng.normal(size=100) pl.figure() pl.scatter(x, y, s=4) x_test = np.linspace(-1, 1, 100) pl.figure() pl.scatter(x, y, s=4) X = np.array([x**i for i in range(5)]).T X_test = np.array([x_test**i for i in range(5)]).T order4 = LinearRegression() order4.fit(X, y) pl.plot(x_test, order4.predict(X_test), label='4th order') X = np.array([x**i for i in range(10)]).T X_test = np.array([x_test**i for i in range(10)]).T order9 = LinearRegression() order9.fit(X, y) pl.plot(x_test, order9.predict(X_test), label='9th order') pl.legend(loc='best') pl.axis('tight') pl.title('Fitting a 4th and a 9th order polynomial') pl.figure() pl.scatter(x, y, s=4) pl.plot(x_test, f(x_test), label="truth") pl.axis('tight') pl.title('Ground truth (9th order polynomial)')