import theanoml
reload(theanoml)
<module 'theanoml' from 'theanoml/__init__.pyc'>
import cPickle
import numpy as np
X, y = cPickle.load(open('data/blackbox.pkl', 'rb'))
y = y - 1
classes = np.unique(y)
print X.shape, y.shape, classes
(1000, 1875) (1000,) [0 1 2 3 4 5 6 7 8]
*TEST Contractive AutoEncoder*
reload(theanoml.autoencoder)
ac = theanoml.autoencoder.ContractiveAutoEncoder(n_epochs=5)
ac.fit(X)
print ac.transform(X).shape
training epoch 0, recall cost -849.060751, jacobian norm 13.992968 training epoch 1, recall cost -2802.754321, jacobian norm 8.055183 training epoch 2, recall cost -4695.691037, jacobian norm 7.194211 training epoch 3, recall cost -6594.667194, jacobian norm 7.036029 training epoch 4, recall cost -8497.628880, jacobian norm 7.056526 (1000, 500)
## TEST the usage of auto encoder for classification
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import SGDClassifier
train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size = 0.2)
ac = theanoml.autoencoder.ContractiveAutoEncoder(n_hidden=30, n_epochs=20)
train_feats = ac.fit_transform(train_X)
validation_feats = ac.transform(validation_X)
training epoch 0, recall cost 453.759859, jacobian norm 4.880746 training epoch 1, recall cost 17.325294, jacobian norm 3.223397 training epoch 2, recall cost -67.488043, jacobian norm 2.688757 training epoch 3, recall cost -125.443729, jacobian norm 2.494923 training epoch 4, recall cost -176.106676, jacobian norm 2.418065 training epoch 5, recall cost -223.721742, jacobian norm 2.400184 training epoch 6, recall cost -269.771063, jacobian norm 2.415523 training epoch 7, recall cost -314.896781, jacobian norm 2.443972 training epoch 8, recall cost -359.440429, jacobian norm 2.482995 training epoch 9, recall cost -403.649780, jacobian norm 2.558611 training epoch 10, recall cost -448.440946, jacobian norm 2.665677 training epoch 11, recall cost -496.511160, jacobian norm 2.693180 training epoch 12, recall cost -548.328873, jacobian norm 2.634594 training epoch 13, recall cost -603.231024, jacobian norm 2.638994 training epoch 14, recall cost -661.238554, jacobian norm 2.588580 training epoch 15, recall cost -721.428754, jacobian norm 2.518980 training epoch 16, recall cost -783.781057, jacobian norm 2.535105 training epoch 17, recall cost -847.916318, jacobian norm 2.404655 training epoch 18, recall cost -913.003163, jacobian norm 2.371111 training epoch 19, recall cost -978.504146, jacobian norm 2.369204
sgd = SGDClassifier()
sgd.fit(train_feats, train_y)
print sgd.score(train_feats, train_y)
print sgd.score(validation_feats, validation_y)
0.11875 0.09
*Test Denoising AutoEncoder*
reload(theanoml.autoencoder)
da = theanoml.autoencoder.DenoisingAutoEncoder()
da.fit(X)
print da.transform(X).shape
training epoch 0, recall cost -799.749661 training epoch 1, recall cost -2749.752624 training epoch 2, recall cost -4658.635129 training epoch 3, recall cost -6579.628540 training epoch 4, recall cost -8506.663734 training epoch 5, recall cost -10438.382075 training epoch 6, recall cost -12365.957260 training epoch 7, recall cost -14292.152982 training epoch 8, recall cost -16226.490023 training epoch 9, recall cost -18152.938796 training epoch 10, recall cost -20078.178948 training epoch 11, recall cost -22015.874000 training epoch 12, recall cost -23934.141165 training epoch 13, recall cost -25865.363523 training epoch 14, recall cost -27787.282287 training epoch 15, recall cost -29725.064009 training epoch 16, recall cost -31659.581391 training epoch 17, recall cost -33561.392486 training epoch 18, recall cost -35497.663851 training epoch 19, recall cost -37422.808957 (1000, 500)
## TEST the usage of denoising auto encoder for classification
reload(theanoml.autoencoder)
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import SGDClassifier
train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size = 0.2)
da = theanoml.autoencoder.DenoisingAutoEncoder(corruption_level=0.1)
train_feats = da.fit_transform(train_X)
validation_feats = da.transform(validation_X)
sgd = SGDClassifier()
sgd.fit(train_feats, train_y)
print sgd.score(train_feats, train_y)
print sgd.score(validation_feats, validation_y)
training epoch 0, recall cost -609.777560 training epoch 1, recall cost -2195.633734 training epoch 2, recall cost -3720.939261 training epoch 3, recall cost -5256.150622 training epoch 4, recall cost -6800.977186 training epoch 5, recall cost -8352.417585 training epoch 6, recall cost -9909.995321 training epoch 7, recall cost -11465.122129 training epoch 8, recall cost -13014.628828 training epoch 9, recall cost -14579.630394 training epoch 10, recall cost -16126.183992 training epoch 11, recall cost -17668.635624 training epoch 12, recall cost -19238.311686 training epoch 13, recall cost -20767.353782 training epoch 14, recall cost -22341.993979 training epoch 15, recall cost -23895.572684 training epoch 16, recall cost -25444.877052 training epoch 17, recall cost -27005.653110 training epoch 18, recall cost -28509.056846 training epoch 19, recall cost -30077.161623 0.08625 0.085