Notebook

In [1]:

import theanoml
reload(theanoml)

Out[1]:

<module 'theanoml' from 'theanoml/__init__.pyc'>

In [14]:

import cPickle
import numpy as np
X, y = cPickle.load(open('data/blackbox.pkl', 'rb'))
y = y - 1
classes = np.unique(y)
print X.shape, y.shape, classes

(1000, 1875) (1000,) [0 1 2 3 4 5 6 7 8]

*TEST Contractive AutoEncoder*

In [15]:

reload(theanoml.autoencoder)
ac = theanoml.autoencoder.ContractiveAutoEncoder(n_epochs=5)
ac.fit(X)
print ac.transform(X).shape

training epoch 0, recall cost -849.060751, jacobian norm 13.992968 
training epoch 1, recall cost -2802.754321, jacobian norm 8.055183 
training epoch 2, recall cost -4695.691037, jacobian norm 7.194211 
training epoch 3, recall cost -6594.667194, jacobian norm 7.036029 
training epoch 4, recall cost -8497.628880, jacobian norm 7.056526 
(1000, 500)

In [16]:

## TEST the usage of auto encoder for classification
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import SGDClassifier
train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size = 0.2)
ac = theanoml.autoencoder.ContractiveAutoEncoder(n_hidden=30, n_epochs=20)
train_feats = ac.fit_transform(train_X)
validation_feats = ac.transform(validation_X)

training epoch 0, recall cost 453.759859, jacobian norm 4.880746 
training epoch 1, recall cost 17.325294, jacobian norm 3.223397 
training epoch 2, recall cost -67.488043, jacobian norm 2.688757 
training epoch 3, recall cost -125.443729, jacobian norm 2.494923 
training epoch 4, recall cost -176.106676, jacobian norm 2.418065 
training epoch 5, recall cost -223.721742, jacobian norm 2.400184 
training epoch 6, recall cost -269.771063, jacobian norm 2.415523 
training epoch 7, recall cost -314.896781, jacobian norm 2.443972 
training epoch 8, recall cost -359.440429, jacobian norm 2.482995 
training epoch 9, recall cost -403.649780, jacobian norm 2.558611 
training epoch 10, recall cost -448.440946, jacobian norm 2.665677 
training epoch 11, recall cost -496.511160, jacobian norm 2.693180 
training epoch 12, recall cost -548.328873, jacobian norm 2.634594 
training epoch 13, recall cost -603.231024, jacobian norm 2.638994 
training epoch 14, recall cost -661.238554, jacobian norm 2.588580 
training epoch 15, recall cost -721.428754, jacobian norm 2.518980 
training epoch 16, recall cost -783.781057, jacobian norm 2.535105 
training epoch 17, recall cost -847.916318, jacobian norm 2.404655 
training epoch 18, recall cost -913.003163, jacobian norm 2.371111 
training epoch 19, recall cost -978.504146, jacobian norm 2.369204

In [17]:

sgd = SGDClassifier()
sgd.fit(train_feats, train_y)
print sgd.score(train_feats, train_y)
print sgd.score(validation_feats, validation_y)

0.11875
0.09

*Test Denoising AutoEncoder*

In [18]:

reload(theanoml.autoencoder)
da = theanoml.autoencoder.DenoisingAutoEncoder()
da.fit(X)
print da.transform(X).shape

training epoch 0, recall cost -799.749661 
training epoch 1, recall cost -2749.752624 
training epoch 2, recall cost -4658.635129 
training epoch 3, recall cost -6579.628540 
training epoch 4, recall cost -8506.663734 
training epoch 5, recall cost -10438.382075 
training epoch 6, recall cost -12365.957260 
training epoch 7, recall cost -14292.152982 
training epoch 8, recall cost -16226.490023 
training epoch 9, recall cost -18152.938796 
training epoch 10, recall cost -20078.178948 
training epoch 11, recall cost -22015.874000 
training epoch 12, recall cost -23934.141165 
training epoch 13, recall cost -25865.363523 
training epoch 14, recall cost -27787.282287 
training epoch 15, recall cost -29725.064009 
training epoch 16, recall cost -31659.581391 
training epoch 17, recall cost -33561.392486 
training epoch 18, recall cost -35497.663851 
training epoch 19, recall cost -37422.808957 
(1000, 500)

In [23]:

## TEST the usage of denoising auto encoder for classification
reload(theanoml.autoencoder)
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import SGDClassifier
train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size = 0.2)
da = theanoml.autoencoder.DenoisingAutoEncoder(corruption_level=0.1)
train_feats = da.fit_transform(train_X)
validation_feats = da.transform(validation_X)
sgd = SGDClassifier()
sgd.fit(train_feats, train_y)
print sgd.score(train_feats, train_y)
print sgd.score(validation_feats, validation_y)

training epoch 0, recall cost -609.777560 
training epoch 1, recall cost -2195.633734 
training epoch 2, recall cost -3720.939261 
training epoch 3, recall cost -5256.150622 
training epoch 4, recall cost -6800.977186 
training epoch 5, recall cost -8352.417585 
training epoch 6, recall cost -9909.995321 
training epoch 7, recall cost -11465.122129 
training epoch 8, recall cost -13014.628828 
training epoch 9, recall cost -14579.630394 
training epoch 10, recall cost -16126.183992 
training epoch 11, recall cost -17668.635624 
training epoch 12, recall cost -19238.311686 
training epoch 13, recall cost -20767.353782 
training epoch 14, recall cost -22341.993979 
training epoch 15, recall cost -23895.572684 
training epoch 16, recall cost -25444.877052 
training epoch 17, recall cost -27005.653110 
training epoch 18, recall cost -28509.056846 
training epoch 19, recall cost -30077.161623 
0.08625
0.085

In [ ]: