*Test Input and Output Binding in Theano*
## Patterns for writing theano formulas
import theano
import theano.tensor as T
import numpy as np
class Computation(object):
def __init__(self, x = None, y = None):
self.x = x or T.matrix('x')
self.y = y or T.matrix('y')
## for partial_fit mode
def bind_input(self, x):
self.x = x
def bind_output(self, y):
self.y = y
def error(self):
return T.mean((self.y - self.x)**2)
data_x = np.random.random((1000, 10))
data_y = data_x + np.random.random((1000, 10)) * 0.01
model = Computation()
error1 = theano.function(inputs = [],
outputs = model.error(),
givens = {
model.x: data_x[:10, :10],
model.y: data_y[:10, :10]
})
error2 = theano.function(inputs = [],
outputs = model.error(),
givens = {
model.x: data_x[:15, :10],
model.y: data_y[:15, :10]
})
print error1(), error2()
3.43315307605e-05 3.45271127376e-05
*Contractive Auto Encoder*
import theano
import theano.tensor as T
import numpy as np
import time
class ContractiveAutoEncoder(object):
"""
Contractive Auto Encoder.
References :
- S. Rifai, P. Vincent, X. Muller, X. Glorot, Y. Bengio: Contractive
Auto-Encoders: Explicit Invariance During Feature Extraction, ICML-11
- S. Rifai, X. Muller, X. Glorot, G. Mesnil, Y. Bengio, and Pascal
Vincent. Learning invariant features through local space
contraction. Technical Report 1360, Universite de Montreal
- Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
Training of Deep Networks, Advances in Neural Information Processing
Systems 19, 2007
"""
def __init__(self, rng, n_visible, n_hidden, batch_size = 1,
X = None, W = None, bhid = None, bvis = None):
"""
Initialize the ConctractiveAutoEncoder class by specifying
the number of visible units, the number of hidden units,
and the contraction level. The constructor also receives
symbolic variables for the input, weights and bias.
rng = np.random.RandomState
input = symbolic description of input or None for standalone encoder
W, bhid, bvis theano variable pointing to a set of variables that should
be shared among dA and another architecture; if dA is standalone,
set these to None
"""
self.n_visible = n_visible
self.n_hidden = n_hidden
self.batch_size = batch_size
if not W:
W_bound = 4 * np.sqrt(6. / (n_hidden + n_visible))
W = theano.shared(value = np.asarray(rng.uniform(
low = -W_bound,
high = W_bound,
size = (n_visible, n_hidden)),
dtype = theano.config.floatX),
name = 'W',
borrow = True)
if not bvis:
bvis = theano.shared(value = np.zeros(n_visible,
dtype = theano.config.floatX),
borrow = True)
if not bhid:
bhid = theano.shared(value = np.zeros(n_hidden,
dtype = theano.config.floatX),
borrow = True)
self.W = W
self.b = bhid
self.b_prime = bvis
self.W_prime = self.W.T
self.X = X or T.matrix(name = 'X')
self.params = [self.W, self.b, self.b_prime]
def get_hidden_values(self, X):
return T.nnet.sigmoid(T.dot(X, self.W) + self.b)
def get_jacobian(self, hidden, W):
"""
Compute the jacobian of the hidden layer wrt
the input, reshapes are necessary for broadcasting the
element-wise product of the right axis
"""
reshaped_hidden = T.reshape(hidden * (1-hidden), (self.batch_size, 1, self.n_hidden))
reshaped_W = T.reshape(W, (1, self.n_visible, self.n_hidden))
return reshaped_hidden * reshaped_W
def get_reconstructed_input(self, hidden):
return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
def get_cost_updates(self, contraction_level, learning_rate):
y = self.get_hidden_values(self.X)
z = self.get_reconstructed_input(y)
J = self.get_jacobian(y, self.W)
## Sum over the size of a datapoint, if minibatches are used
## L will be a vector, with one entry per example in minibatch
self.L_rec = - T.sum(self.X*T.log(z) + (1-self.X)*T.log(1-z), axis = 1)
## Compute the jacobian and average over the number of samples/minibatch
self.L_jacob = T.sum(J ** 2) / self.batch_size
## L is now a vector, where each element is the cross-entropy
## cost of the reconstruction of the corresponding example of the
## minibatch. We need to compute the average of all these to get
## the cost of the minibatch
cost = T.mean(self.L_rec) + contraction_level * T.mean(self.L_jacob)
gparams = T.grad(cost, self.params)
updates = [(param, param-learning_rate*gparam)
for (param, gparam) in zip(self.params, gparams)]
return (cost, updates)
def share_data(data, dtype = theano.config.floatX):
shared_data = theano.shared(np.asarray(data, dtype = theano.config.floatX),
borrow = True)
return T.cast(shared_data, dtype = dtype)
def run_cA(v_train_X,
learning_rate = 0.01, n_epochs = 20, batch_size = 10, contraction_level = 0.1):
n_train_batches = v_train_X.get_value(borrow = True).shape[0] / batch_size
n_feats = v_train_X.get_value(borrow = True).shape[1]
print 'building model'
index = T.lscalar()
x = T.matrix('x')
rng = np.random.RandomState(0)
ca = ContractiveAutoEncoder(rng = rng, X = x, n_visible = n_feats,
n_hidden = 500, batch_size = batch_size)
cost, updates = ca.get_cost_updates(contraction_level, learning_rate)
train_ca = theano.function(inputs = [index],
outputs = [T.mean(ca.L_rec), ca.L_jacob],
updates = updates,
givens = {
x: v_train_X[index*batch_size:(index+1)*batch_size],
})
start_time = time.clock()
print 'training model'
for epoch in xrange(n_epochs):
c = [train_ca(i) for i in xrange(n_train_batches)]
c_array = np.vstack(c)
print c_array.shape
print 'training epoch %d, reconstruction cost %f, jacobian norm %f' % (
epoch, np.mean(c_array[:, 0]), np.mean(np.sqrt(c_array[:, 1])))
end_time = time.clock()
import cPickle
from sklearn.cross_validation import train_test_split
#X, y = cPickle.load(open('data/digits.pkl', 'rb'))
X, y = cPickle.load(open('data/blackbox.pkl', 'rb'))
y = y - 1
print X.shape, y.shape
train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size = 0.9)
print train_X.shape, train_y.shape
v_train_X, v_validation_X = share_data(train_X), share_data(validation_X)
v_train_y = share_data(train_y, dtype = 'int32')
v_validation_y = share_data(validation_y, dtype='int32')
run_cA(v_train_X)
(1000, 1875) (1000,) (100, 1875) (100,) building model training model (10, 2) training epoch 0, reconstruction cost 424.612998, jacobian norm 23.137957 (10, 2) training epoch 1, reconstruction cost -172.349470, jacobian norm 19.565774 (10, 2) training epoch 2, reconstruction cost -416.215884, jacobian norm 16.351329 (10, 2) training epoch 3, reconstruction cost -635.762077, jacobian norm 14.059150 (10, 2) training epoch 4, reconstruction cost -846.087529, jacobian norm 12.458559 (10, 2) training epoch 5, reconstruction cost -1052.016090, jacobian norm 11.318715 (10, 2) training epoch 6, reconstruction cost -1256.081126, jacobian norm 10.447896 (10, 2) training epoch 7, reconstruction cost -1459.182969, jacobian norm 9.759262 (10, 2) training epoch 8, reconstruction cost -1661.702354, jacobian norm 9.216226 (10, 2) training epoch 9, reconstruction cost -1863.872129, jacobian norm 8.774608 (10, 2) training epoch 10, reconstruction cost -2065.774331, jacobian norm 8.418967 (10, 2) training epoch 11, reconstruction cost -2267.598844, jacobian norm 8.129463 (10, 2) training epoch 12, reconstruction cost -2469.441701, jacobian norm 7.887607 (10, 2) training epoch 13, reconstruction cost -2671.284029, jacobian norm 7.679948 (10, 2) training epoch 14, reconstruction cost -2873.096477, jacobian norm 7.500753 (10, 2) training epoch 15, reconstruction cost -3074.903276, jacobian norm 7.346815 (10, 2) training epoch 16, reconstruction cost -3276.734967, jacobian norm 7.213882 (10, 2) training epoch 17, reconstruction cost -3478.606484, jacobian norm 7.100257 (10, 2) training epoch 18, reconstruction cost -3680.525140, jacobian norm 7.005262 (10, 2) training epoch 19, reconstruction cost -3882.509590, jacobian norm 6.924966
*Stacked Denoising Auto Encoder*
import time
import numpy as np
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from theanoml.formula import share_data, LogisticRegressionFormula, HiddenLayerFormula
from theanoml.autoencoder import DenoisingAutoEncoderFormula
class SdA(object):
"""
Stacked denoising auto-encoder class
A stacked denoising autoencoder model is obtained by stacking several
dAs. The hidden layer of the dA at layer i becomes the input of the dA at
layer i+1. The first layer dA gets as input the input of the SdA, and the
hidden layer of the last dA represents the output. Note that after pretraining,
the SdA is dealt with as a normal MLP, the dAs are only used to initalize
the weights.
"""
def __init__(self, n_in, n_hiddens, n_out, corruption_levels = [0.1, 0.1]):
self.sigmoid_layers = []
self.dA_layers = []
self.params = []
self.n_layers = len(n_hiddens)
theano_rng = RandomStreams(np.random.randint(2 ** 30))
self.x = T.matrix('x')
self.y = T.ivector('y')
## TODO
??
def pretraining_functions(self, X, batch_size):
pass
def build_finetune_functions(self, train_X, train_y,
validation_X, validation_y, batch_size,
learning_rate):
pass
def run_sda(v_train_X, v_train_y, v_validation_X, v_validation_y,
finetune_lr = 0.1, pretraining_epochs = 15,
pretrain_lr = 0.001, training_epochs = 1000,
batch_size = 5):
n_samples, n_feats = v_train_X.get_value(borrow=True).shape
n_train_batches = n_samples / batch_size
print 'building the model '
sda = SdA(n_in = n_feats, n_hiddens = [1000, 1000, 1000], n_out = 10)
print 'getting the pretraining functions '
pretraining_fns = sda.pretraining_functions(X = v_train_X, batch_size = batch_size)
print 'pretraining the model'
corruption_levels = [.1, .2, .3]
for i in xrange(sda.n_layers):
for epoch in xrange(pretraining_epochs):
cost = np.mean([pretraining_fns[i](
index = bindex,
corruption = corruption_levels[i],
lr = pretrain_lr)
for bindex in xrange(n_train_batches)])
print 'pre-training layer %i, epoch %d, cost %f' % (i, epoch, cost)
print 'getting the finetuning functions '
train_fn, validate_model = sda.build_finetune_functions(
v_train_X, v_train_y, v_validation_X, v_validation_y,
batch_size = batch_size, learning_rate = finetune_lr
)
print 'finetunning the model '
patience = 10 * n_train_batches
patience_increase = 2
improvement_threshold = 0.995
validation_frequency = min(n_train_batches, patience / 2)
best_params = None
best_validation_loss = np.inf
done_looping = False
epoch = 0
while (epoch < training_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
train_fn(minibatch_index)
iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter+1) % validation_frequency == 0:
this_validation_loss = np.mean(validate_model())
print 'epoch %i, minibatch %i/%i, validation error %f %%' % (
epoch, minibatch_index+1, n_train_batches, this_validation_loss * 100.
)
if this_validation_loss < best_validation_loss:
if this_validation_loss < best_validation_loss * improvement_threshold:
patience = max(patience, iter * patience_increase)
best_validation_loss = this_validation_loss
best_params = sda.params
if patience <= iter:
done_looping = True
break
import cPickle
from sklearn.cross_validation import train_test_split
#X, y = cPickle.load(open('data/digits.pkl', 'rb'))
X, y = cPickle.load(open('data/blackbox.pkl', 'rb'))
y = y - 1
print X.shape, y.shape
train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size = 0.9)
print train_X.shape, train_y.shape
v_train_X, v_validation_X = share_data(train_X), share_data(validation_X)
v_train_y = share_data(train_y, dtype = 'int32')
v_validation_y = share_data(validation_y, dtype='int32')
run_sda(v_train_X, v_train_y, v_validation_X, v_validation_y)
(1000, 1875) (1000,) (100, 1875)
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-8-08f4fe4fe846> in <module>() 10 v_train_y = share_data(train_y, dtype = 'int32') 11 v_validation_y = share_data(validation_y, dtype='int32') ---> 12 run_sda(v_train_X, v_train_y, v_validation_X, v_validation_y) <ipython-input-7-05eff9d23eff> in run_sda(v_train_X, v_train_y, v_validation_X, v_validation_y, finetune_lr, pretraining_epochs, pretrain_lr, training_epochs, batch_size) 30 print 'pretraining the model' 31 corruption_levels = [.1, .2, .3] ---> 32 for i in xrange(sda.n_layers): 33 for epoch in xrange(pretraining_epochs): 34 cost = np.mean([pretraining_fns[i]( AttributeError: 'SdA' object has no attribute 'n_layers'
(100,) building the model getting the pretraining functions pretraining the model