# write out some toy data
from sklearn.datasets import load_digits
import cPickle
digits = load_digits()
X, y = digits.data, digits.target
for i in range(10):
cPickle.dump((X[i::10], y[i::10]), open("data/batch_%02d.pickle" % i, "w"), -1)
from sklearn.linear_model import SGDClassifier
sgd = SGDClassifier()
for i in range(9):
X_batch, y_batch = cPickle.load(open("data/batch_%02d.pickle" % i))
sgd.partial_fit(X_batch, y_batch, classes=range(10))
X_test, y_test = cPickle.load(open("data/batch_09.pickle"))
sgd.score(X_test, y_test)
from sklearn.kernel_approximation import RBFSampler
sgd = SGDClassifier()
kernel_approximation = RBFSampler(gamma=.001, n_components=400)
for i in range(9):
X_batch, y_batch = cPickle.load(open("data/batch_%02d.pickle" % i))
if i == 0:
kernel_approximation.fit(X_batch)
X_transformed = kernel_approximation.transform(X_batch)
sgd.partial_fit(X_transformed, y_batch, classes=range(10))
X_test, y_test = cPickle.load(open("data/batch_09.pickle"))
sgd.score(kernel_approximation.transform(X_test), y_test)
Compare the speed and accuracy of LogisticRegression
, LinearSVC
and SGDClassifier
on the bank campaign and adult datasets. Try LogisticRegression(solver='sag')
.