import theano.tensor as T
from theano import function, shared
from theano import pp
import theano
import numpy as np
from pprint import pprint
*Basics of Tensor Functionality*
## create symbols of variables and functions
## 1. use T.xx to directly create variables
## 2. use operators (such as +) to create new variables - TensorVariable
## 3. use function to create function symbols
x = T.dscalar('x') ## ALL SYMBOLS must be typed, T.dscalar = 0-d arrays (scalar) of doubles (d)
print type(x) ## dscalar is similiar to dtype in np, so x is not an instance of dscalar, but TensorVariable
y = T.dscalar('y') ## T.dscalar() is like a factory method
print y.type is T.dscalar ## the "dtype" of a TensorVariable is accessed by 'type' attr
z = x + y
print type(z), pp(z) ## pp can be used for pretty-print of variables, but for functions
f = function([x, y], z) ## compiling function object to C code, f CAN be used like a normal python function
print type(f), f ## cannot use pp for f
print f(10.1, 100)
print z.eval({x: 10.1, y: 100}) ## z's eval() is equivalent to function(), but less flexible
<class 'theano.tensor.basic.TensorVariable'> True <class 'theano.tensor.basic.TensorVariable'> (x + y) <class 'theano.compile.function_module.Function'> <theano.compile.function_module.Function object at 0x10b918bd0> 110.1 110.1
## every symbol in theano must be typed, which can be done by using specific factory method in T
## matrices algebra
x = T.dmatrix('x') # matrix of double- matrices are DUCK-typed, np.array or list of list, it RETURNS np.array
print type(x), x.type, pp(x)
y = T.dmatrix('y')
z = x * y ## elementwise multiplication
f = function([x, y], z)
## perform f on list of list
r = f([[1, 2], [3, 4]], [[10, 20], [30, 40]])
print type(r)
print r
## perform f again on np.array
r = f(np.asarray([[1, 2], [3, 4]]), np.asarray([[10, 20], [30, 40]]))
print type(r)
print r
<class 'theano.tensor.basic.TensorVariable'> TensorType(float64, matrix) x <type 'numpy.ndarray'> [[ 10. 40.] [ 90. 160.]] <type 'numpy.ndarray'> [[ 10. 40.] [ 90. 160.]]
*Theano symbol typing system*
system: scalar, vector, matrix, row, col, tensor3, tensor4
they can be created using the specific type-specific factory methods, or just using the common factory methods, such as tensor.scalar(name, dtype=config.floatX)
## shared TensorVariable
## Shared TensorVariables are usually used to convert existing python objects to symbol variables
## it is more like another type of beast, which is why shared is now in theano pkg directly
## instead of in theano.tensor
x = shared(np.random.randn(3, 4))
print type(x), pp(x), x.type
print x.value
print T.shape(x)
## use eval to get the value in the shared variable
print x.eval()
## the created variable is a TensorSharedVariable whose .value is a np array in this case
## the dtype of x is inferred from the ndarray
<class 'theano.tensor.sharedvar.TensorSharedVariable'> <TensorType(float64, matrix)> TensorType(float64, matrix) (<property object at 0x109aa5e68>,) Shape.0 [[ 0.97698495 -1.53368543 0.92842832 0.64082965] [-1.12715379 0.0224872 0.49577723 -0.99701666] [ 0.14669318 1.45799837 -0.38410862 0.23779664]]
The argument to shared variable will NOT be copied, and subsequent changes will be reflected in X.value
On the other hande, theano makes a copy of any ndarray that you use in an expression, so subsequent changes to that ndarray will not have any effect on the theano expression
*Types of TensorVariables*
*TensorVariable Operation*
Tensor pacakage has different types of operators, which can be used to combine different types of variables to create new tensor variables, such as
*All about defining functions*
## define function that returns MULTIPLE OUTPUT
a, b = T.dmatrices('a', 'b')
diff = a - b ## - operator
abs_diff = abs(a-b) ## abs operator
diff_squared = diff ** 2 ## ** operator
f = function([a, b], [diff, abs_diff, diff_squared])
pprint(f([[0, 1], [-1, -2]], [[1, 1], [-1, -2]]))
[array([[-1., 0.], [ 0., 0.]]), array([[ 1., 0.], [ 0., 0.]]), array([[ 1., 0.], [ 0., 0.]])]
## setting a DEFAULT value for an argument
## Use theano.Param to WRAP a variable and a default value
from theano import Param # Param, Shared are in theano package
## Param class allows you to specify properties of your functions
## parameters with greater details, such as mutability, default and etc.
## Inputs with default values must follow inputs without default values.
## Param even allows the overridding of variable name for the specific function.
x, y = T.dmatrices('x', 'y')
z = x + y
f = function([x, Param(y, default=[[1, 2]], name='y_by_name')], z)
pprint(f([[2, 3]]))
pprint(f([[2, 3]], y_by_name=[[0, 0]])) # use the override name to specify named params
pprint(f([[2, 3]], [[0, 0]]))
array([[ 3., 5.]]) array([[ 2., 3.]]) array([[ 2., 3.]])
## Write a function with internal state - using Shared variable
## example: accumulator
from theano import shared
state = shared(0)
inc = T.iscalar('inc')
accumulator = function([inc], state, updates = [(state, state+inc)]) # inputs, outputs, updates
print 'shared state:', state.get_value()
print accumulator(1)
print 'shared state:', state.get_value()
print accumulator(10)
state.set_value(0) ## reset
print 'shared state:', state.get_value()
print accumulator(1)
## Shared variables are hybrid symbolic and non-symbolic variables whose
## value may be shared between multiple functions.
## Shared variables can be used in symbolic expressions just like the objects
## returned by dmatrices(...) but they also have an internal value that defines
## the value taken by this symbolic variable in ALL the functions that use it.
## The value stored in the shared variable can be accessed and modified by the
## .get_value() and .set_value() methods.
## the updates parameter of a Function is a list of pairs of the form
## (shared_variable, new_expression) or a dictionary
## Main reasons for using shared variable is their efficiency.
shared state: 0 0 shared state: 1 1 shared state: 0 0
## Introducing RANDOMNESS in functions
## IN theano a random number generator is implemented as a Random Stream Object
## Random Streams are at their core SHARED variables.
## Theano random objects are defined and implemented in RandomStreams and at a lower level
## RandomStreamBase.
## MORE ON RANDOMNESS http://deeplearning.net/software/theano/tutorial/examples.html
from theano.tensor.shared_randomstreams import RandomStreams # REMEMBER THIS!
## RANDOM STREAM
srng = RandomStreams(seed = 0) ## random generator (stream)
print type(srng)
## RANDOM VARIABLES
rv_u = srng.uniform((2, 2)) # random variable, which will update rv_u.rng state everytime
rv_n = srng.normal((2, 2))
print type(rv_u), rv_u.type, rv_u.rng
f = function([], rv_u) ## update srng every time - different values every time
g = function([], rv_n, no_default_updates=True) # NOT UPDATING srng - same value every time
print f()
print f()
print g()
print g()
<class 'theano.tensor.shared_randomstreams.RandomStreams'> <class 'theano.tensor.basic.TensorVariable'> TensorType(float64, matrix) <RandomStateType> [[ 0.48604732 0.68571232] [ 0.98557605 0.19559641]] [[ 0.58341167 0.98058218] [ 0.1804803 0.70146864]] [[ 1.99759307 0.35128336] [ 1.50384112 1.25808594]] [[ 1.99759307 0.35128336] [ 1.50384112 1.25808594]]
*Example of Logistic Regression*
## Computing logistic function - elementwise way
## definition 1: logistic(x) = 1. / (1. + exp(-x))
x = T.dmatrix('x')
s = 1 / (1 + T.exp(-x))
logistic = function([x], s)
print logistic([[0, 1], [-1, -2]])
## definition 2: logistic(x) = (1. + tanh(x/2)) / 2
s2 = (1 + T.tanh(x/2)) / 2
logistic2 = function([x], s2)
print logistic2([[0, 1], [-1, -2]])
[[ 0.5 0.73105858] [ 0.26894142 0.11920292]] [[ 0.5 0.73105858] [ 0.26894142 0.11920292]]
## Logistic Regression
import numpy as np
import theano
import theano.tensor as T
rng = np.random # random number generator
## DATA
N = 400
feats = 1000
data_X, data_y = (rng.randn(N, feats), rng.randint(size = N, low = 0, high = 2))
training_steps = 10000
print data_X.shape, data_y.shape
print np.unique(data_y)
## theano symbolic variables
x = T.dmatrix('x')
y = T.dvector('y') ## y is A VECTOR!!!
w = theano.shared(rng.randn(feats), name='w')
b = theano.shared(0., name='b')
#print 'initial model:'
#print w.get_value(), b.get_value()
## theano expression graph
p_1 = 1 / (1 + T.exp(- T.dot(x, w) - b))
prediction = p_1 > 0.5
x_ent = -y * T.log(p_1) - (1-y) * T.log(1-p_1)
cost = T.mean(x_ent) + 0.01 * T.sum(abs(w)) # cost = xent.mean() + 0.01 * (w ** 2).sum()
accuracy = T.mean(T.eq(prediction, y))
gw, gb = T.grad(cost, [w, b])
## compile - shared w, b for train and predict function
train = theano.function(
inputs = [x, y],
outputs = [prediction, x_ent],
updates = {w: w - 0.1*gw, b: b-0.1*gb})
predict = theano.function(inputs = [x], outputs = prediction)
score = theano.function(inputs = [x, y], outputs = accuracy)
## train
for i in xrange(training_steps):
if i % 1000 == 0:
print 'iteration ', i
pred, err = train(data_X, data_y)
## predict
print 'performance on D:', score(data_X, data_y)
print 'significant number of features:', sum(np.abs(w.get_value()) > 0)
(400, 1000) (400,) [0 1] iteration 0 iteration 1000 iteration 2000 iteration 3000 iteration 4000 iteration 5000 iteration 6000 iteration 7000 iteration 8000 iteration 9000 performance on D: 1.0 significant number of features: 1000
## Wrap theano logistic regression as sklearn model
from sklearn.base import BaseEstimator
import numpy as np
import theano
import theano.tensor as T
class LogisticRegression(BaseEstimator):
def __init__(self, alpha = 0.01, n_iters = 10000, learning_rate = 0.1):
## meta params
self.rng = np.random
self.alpha = alpha
self.n_iters = n_iters
self.learning_rate = 0.1
## independent symbols
X = T.dmatrix('X')
y = T.dvector('y')
self.w = theano.shared(rng.randn(1), 'w') # we dont know the dim yet
self.b = theano.shared(0., 'b')
## dependent expressions
p_1 = 1 / (1 + T.exp(- T.dot(X, self.w) - self.b))
prediction = p_1 > 0.5
x_ent = -y * T.log(p_1) - (1-y) * T.log(1-p_1)
cost = T.mean(x_ent) + self.alpha * T.sum(self.w**2)
accuracy = T.mean(T.eq(y, prediction))
gw, gb = T.grad(cost, [self.w, self.b])
## functions
self.train = theano.function(
inputs = [X, y],
outputs = [prediction, x_ent],
updates = ((self.w, self.w - self.learning_rate * gw),
(self.b, self.b - self.learning_rate * gb)))
self.predict = theano.function(
inputs = [X],
outputs = prediction)
self.predict_prob = theano.function(
inputs = [X],
outputs = p_1)
self.score = theano.function(
inputs = [X, y],
outputs = accuracy)
def fit(self, X, y):
## intialize w, and b values
n_samples, n_feats = X.shape
self.w.set_value(self.rng.randn(n_feats))
## train on self.w and self.b
for i in xrange(self.n_iters):
if i % 1000 == 0:
print 'iteration:', i
self.train(X, y)
return self
def predict(self, X):
return self.predict(X)
def predict_prob(self, X):
return self.predict_prob(X)
def score(self, X, y):
return self.score(X, y)
import cPickle
from sklearn.cross_validation import train_test_split
data_X, data_y = cPickle.load(open('data/blackbox.pkl', 'rb'))
data_y[data_y != 1] = 0 # binary classification
train_X, test_X, train_y, test_y = train_test_split(data_X, data_y, test_size = 0.2)
lr = LogisticRegression()
lr.fit(train_X, train_y)
print lr.score(test_X, test_y)
iteration: 0 iteration: 1000 iteration: 2000 iteration: 3000 iteration: 4000 iteration: 5000 iteration: 6000 iteration: 7000 iteration: 8000 iteration: 9000 0.77
## parallel run
from IPython.parallel import Client
client = Client()
dv = client[:]
print 'runing on ', len(dv)
dv.block = True
dv['train_X'] = train_X
dv['train_y'] = train_y
runing on 4
%%px
%load 80
lr = LogisticRegression()
lr.fit(train_X, train_y)
print lr.score(train_X, train_y)
iteration: 0 iteration: 1000 iteration: 2000 iteration: 3000 iteration: 4000 iteration: 5000 iteration: 6000 iteration: 7000 iteration: 8000 iteration: 9000 0.81625
*Flow of Control*
## CONDITIONING
## two main ops: theano.ifelse.ifelse or T.switch
## ifelse => takes a boolean condition and two variables, lazy eval of one variable (returned)
## switch => takes a tensor as condition and two variables. It is eleementwise and more general
## NOTE: Unless linker='vm' or linker='cvm' are used,
## ifelse will compute both variables and take the same computation time as switch.
## Although the linker is not currently set by default to cvm, it will be in the near future
from theano.ifelse import ifelse
import time
a, b = T.scalars('a', 'b')
x, y = T.matrices('x', 'y')
z_switch = T.switch(T.lt(a, b), T.mean(x), T.mean(y)) # evaluate both means of x and y
z_lazy = ifelse(T.lt(a, b), T.mean(x), T.mean(y)) # evaluate one of them
f_switch = theano.function([a, b, x, y], z_switch, mode = theano.Mode(linker = 'vm'))
f_lazyifelse = theano.function([a, b, x, y], z_lazy, mode = theano.Mode(linker = 'vm'))
val1, val2 = 0., 1.
big_mat1, big_mat2 = np.ones((10000, 1000)), np.ones((10000, 1000))
%timeit f_switch(val1, val2, big_mat1, big_mat2)
%timeit f_lazyifelse(val1, val2, big_mat1, big_mat2)
10 loops, best of 3: 22.2 ms per loop 100 loops, best of 3: 10.9 ms per loop
## LOOPS
## The most general way of doing loop in theano is through the scan op
## Both reduction and map can be viewed as special cases of scan
## How it works:
## The op scans a function along some input sequence, producing an output at each time-step
## the function can see the previous K time steps of the function
## Unchanging variables are passed to scan as non_sequences. Initialization occurs
## in outputs_info. And the accumulation happens automatically
## The general order of function parameter to fn param in scan is:
## sequences (if any), prior result(s) (if needed), non-sequences (if any)
## ELEMENTWISE POWER
k = T.iscalar('k')
A = T.vector('A') ## float
def inner_fct(prior_result, B):
return prior_result * B
## Symbolic description of the result - specially define the UPDATES steps
result, updates = theano.scan(fn = inner_fct, outputs_info=T.ones_like(A),
non_sequences=A, n_steps = k)
## Scan has provided us with A ** 1 through A ** k. Keep only the last
## value. Scan notices this and does not waste memory saving them.
final_result = result[-1]
power = theano.function(inputs = [A, k], outputs = final_result, updates = updates)
print power(range(10), 2)
## POLYNOMIAL
coefficents = T.vector('coefficients')
x = T.scalar('x')
## symbolic representation of polynomial components and updates
## Be careful with the initial prior T.constant(0.0), dtype need to be
## provided as coefficents.dtype otherwise a downcasting error will happen
results, updates = theano.scan(fn = lambda coef, power, prior, x: (power+1, prior+coef*(x**power)),
outputs_info = [0., T.as_tensor_variable(np.asarray(0., coefficents.dtype))],
#T.constant(0.0, dtype=coefficents.dtype)],
sequences = coefficents,
non_sequences = x)
result = results[1][-1] # results = [seq(power), seq(sum_of_components)]
poly = theano.function(inputs = [coefficents, x], outputs = result, updates = updates)
print poly([1., 2, 3], 2)
print sum([2 ** i * c for (i, c) in enumerate([1, 2, 3])])
[ 0. 1. 4. 9. 16. 25. 36. 49. 64. 81.] 17.0 17
## YET ANOTHER POLYNOMIAL
power_coeff_pairs = T.matrix('power_coeff_pairs')
x = T.scalar('x')
results, updates = theano.scan(fn = lambda power_coeff, x: power_coeff[1] * (x ** power_coeff[0]),
outputs_info = None,
sequences = power_coeff_pairs,
non_sequences = x)
result = T.sum(results)
poly = theano.function(inputs = [power_coeff_pairs, x], outputs = result, updates = updates)
print poly(list(enumerate([1, 2, 3])), 2)
17.0
T.arange(1, 10).eval()
T.as_tensor_variable?
*Sparse Matrices in Theano*
## Theano sparse matrices are based on scipy sparse package,
## it currently supports two types, namely csc and csr formats, for fast linear algebra
## A general rule of choosing between csr and csc formats is:
## If shape[0] > shape[1], use csr format. Otherwise, use csc.
## ANOTHER ONE is: Use the format compatible with the ops in your computation graph.
import scipy.sparse as sp
from theano import sparse
print sparse.all_dtypes
## MOVE FROM and TO dense matrices
x = sparse.csc_matrix('x')
print x.type
print sparse.dense_from_sparse(x).type
print sparse.csr_from_dense(sparse.dense_from_sparse(x)).type
set(['uint64', 'int32', 'int16', 'complex128', 'complex64', 'float64', 'uint8', 'uint32', 'uint16', 'int64', 'int8', 'float32']) Sparse[float64, csc] TensorType(float64, matrix) Sparse[float64, csr]
*Logistic Regression for Multiple Classification with SGD*
## based on tutorials at
## https://github.com/lisa-lab/DeepLearningTutorials/blob/master/code/logistic_sgd.py
import numpy as np
import theano
import theano.tensor as T
import time
class LogisticRegression(object):
"""
It looks like a pure symoblic model, without any real data
involved.
IS IT A GOOD PRACTICE IN THEANO TO SEPERATE SYMBOLIC REPRESENTATION FROM REAL DATA??
AND EVEN FURTHER SEPERATED FROM OPTIMIZER MODEL
It plays the role of backend support - creating functions that can be directly applied
onto data
"""
def __init__(self, input, n_in, n_out):
## symbolic variables
self.W = theano.shared(value = np.zeros((n_in, n_out)), name = 'W', borrow = True)
self.b = theano.shared(value = np.zeros((n_out,)), name = 'b', borrow = True)
## expressions
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
self.y_pred = T.argmax(self.p_y_given_x, axis = 1)
self.params = [self.W, self.b]
def negative_log_likelihood(self, y):
"""
y = theano.tensor.TensorType
"""
## dim shuffling
return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
def errors(self, y):
return T.mean(T.neq(y, self.y_pred))
def shared_dataset(data_xy, borrow = True):
"""
Function that loads the dataset into shared variables.
The reason we store our dataset in shared variables is to allow
Theano to copy it into the GPU memory (when code is run on GPU).
Since copying data into the GPU is slow, copying a minibatch everytime
is needed (the default behavior if the data is not in a shared variable)
would lead to a large decrease in performance
"""
data_x, data_y = data_xy
shared_x = theano.shared(np.asarray(data_x, dtype=theano.config.floatX),
borrow = borrow)
shared_y = theano.shared(np.asarray(data_y, dtype=theano.config.floatX),
borrow = borrow)
## when storing data on the GPU it has to be stored as floats
## therefore we will store the labels as 'floatX' as well. But
## during our computations we need them as ints (we use labels as index).
## therefore insetead of returning shared_y we will have to cast it into int.
## this little hack lets us get around this issue
return shared_x, T.cast(shared_y, 'int32')
def sgd_optimize(train_data, test_data,
learning_rate = 0.13, n_epochs = 1000, batch_size = 600):
"""
Demostrate stochastic gradient descent optimization for a log-linear model
"""
train_X, train_y = train_data
test_X, test_y = test_data
n_samples, n_feats = train_X.shape.eval()
n_train_batches = train_X.get_value(borrow = True).shape[0] / batch_size
n_test_batches = test_X.get_value(borrow = True).shape[0] / batch_size
print 'building the model ...'
index = T.lscalar() # index of minibatch
XX = T.matrix('X')
yy = T.ivector('y')
classifier = LogisticRegression(input = XX, n_in = n_feats, n_out = 10)
cost = classifier.negative_log_likelihood(yy)
test_model = theano.function(inputs = [index],
outputs = classifier.errors(yy),
givens = {
XX: test_X[index * batch_size: (index+1) * batch_size],
yy: test_y[index * batch_size: (index+1) * batch_size]
})
g_W = T.grad(cost = cost, wrt = classifier.W)
g_b = T.grad(cost = cost, wrt = classifier.b)
updates = [(classifier.W, classifier.W - learning_rate * g_W),
(classifier.b, classifier.b - learning_rate * g_b)]
train_model = theano.function(inputs = [index],
outputs=cost,
updates = updates,
givens = {
XX: train_X[index * batch_size: (index+1) * batch_size],
yy: train_y[index * batch_size: (index+1) * batch_size]
})
print '... training the model'
## early stopping parameters
## look as this many examples regardless
patience = 5000
## wait this much longer when a new best is found
patience_increase = 2
## a relative improvement of this much is considered signifcant
improvement_threshold = 0.995
## go through this many minibatches before checking the network
## on the test set; in this case we check every epoch
validation_frequency = min(n_train_batches, patience/2)
best_params = None
best_validation_loss = np.inf
test_score = 0.
start_time = time.clock()
done_looping = False
epoch = 0
while (epoch < n_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = train_model(minibatch_index)
## iteration number
iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validation_frequency == 0:
## do the validation - compute zero-one on test set
validation_losses = [test_model(i) for i in xrange(n_test_batches)]
this_validation_loss = np.mean(validation_losses)
print('epoch %i, minibatch %i/%i, validation error %f %%' % (
epoch, minibatch_index+1, n_train_batches, this_validation_loss*100.
))
## if we got the best validation score until now
if this_validation_loss < best_validation_loss:
## improve patience if loss improvement is good enough
if this_validation_loss < best_validation_loss * improvement_threshold:
patience = max(patience, iter * patience_increase)
best_validation_loss = this_validation_loss
if patience <= iter:
done_looping = True
break
end_time = time.clock()
## load data
import cPickle
from sklearn.cross_validation import train_test_split
X, y = cPickle.load(open('data/digits.pkl', 'rb'))
print X.shape, y.shape
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size = 0.2)
(train_X, train_y) = shared_dataset((train_X, train_y))
(test_X, test_y) = shared_dataset((test_X, test_y))
sgd_optimize((train_X, train_y), (test_X, test_y))
(42000, 784) (42000,) building the model ... ... training the model epoch 1, minibatch 56/56, validation error 17.130952 % epoch 2, minibatch 56/56, validation error 18.250000 % epoch 3, minibatch 56/56, validation error 11.464286 % epoch 4, minibatch 56/56, validation error 31.107143 % epoch 5, minibatch 56/56, validation error 21.357143 % epoch 6, minibatch 56/56, validation error 19.202381 % epoch 7, minibatch 56/56, validation error 21.726190 % epoch 8, minibatch 56/56, validation error 30.107143 % epoch 9, minibatch 56/56, validation error 18.869048 % epoch 10, minibatch 56/56, validation error 27.523810 % epoch 11, minibatch 56/56, validation error 9.321429 % epoch 12, minibatch 56/56, validation error 19.702381 % epoch 13, minibatch 56/56, validation error 9.523810 % epoch 14, minibatch 56/56, validation error 19.619048 % epoch 15, minibatch 56/56, validation error 9.357143 % epoch 16, minibatch 56/56, validation error 29.011905 % epoch 17, minibatch 56/56, validation error 9.035714 % epoch 18, minibatch 56/56, validation error 20.071429 % epoch 19, minibatch 56/56, validation error 19.154762 % epoch 20, minibatch 56/56, validation error 9.190476 % epoch 21, minibatch 56/56, validation error 8.571429 % epoch 22, minibatch 56/56, validation error 9.404762 % epoch 23, minibatch 56/56, validation error 21.809524 % epoch 24, minibatch 56/56, validation error 19.154762 % epoch 25, minibatch 56/56, validation error 8.952381 % epoch 26, minibatch 56/56, validation error 21.166667 % epoch 27, minibatch 56/56, validation error 8.785714 % epoch 28, minibatch 56/56, validation error 8.797619 % epoch 29, minibatch 56/56, validation error 10.869048 % epoch 30, minibatch 56/56, validation error 10.202381 % epoch 31, minibatch 56/56, validation error 19.821429 % epoch 32, minibatch 56/56, validation error 29.154762 % epoch 33, minibatch 56/56, validation error 10.988095 % epoch 34, minibatch 56/56, validation error 10.428571 % epoch 35, minibatch 56/56, validation error 18.535714 % epoch 36, minibatch 56/56, validation error 9.488095 % epoch 37, minibatch 56/56, validation error 11.035714 % epoch 38, minibatch 56/56, validation error 18.333333 % epoch 39, minibatch 56/56, validation error 8.916667 % epoch 40, minibatch 56/56, validation error 9.119048 % epoch 41, minibatch 56/56, validation error 11.880952 % epoch 42, minibatch 56/56, validation error 19.142857 % epoch 43, minibatch 56/56, validation error 20.809524 % epoch 44, minibatch 56/56, validation error 20.011905 % epoch 45, minibatch 56/56, validation error 9.166667 % epoch 46, minibatch 56/56, validation error 8.797619 % epoch 47, minibatch 56/56, validation error 19.285714 % epoch 48, minibatch 56/56, validation error 9.892857 % epoch 49, minibatch 56/56, validation error 9.119048 % epoch 50, minibatch 56/56, validation error 21.952381 % epoch 51, minibatch 56/56, validation error 9.261905 % epoch 52, minibatch 56/56, validation error 9.976190 % epoch 53, minibatch 56/56, validation error 9.333333 % epoch 54, minibatch 56/56, validation error 9.726190 % epoch 55, minibatch 56/56, validation error 9.821429 % epoch 56, minibatch 56/56, validation error 22.488095 % epoch 57, minibatch 56/56, validation error 9.238095 % epoch 58, minibatch 56/56, validation error 12.476190 % epoch 59, minibatch 56/56, validation error 9.095238 % epoch 60, minibatch 56/56, validation error 9.190476 % epoch 61, minibatch 56/56, validation error 9.261905 % epoch 62, minibatch 56/56, validation error 8.833333 % epoch 63, minibatch 56/56, validation error 9.750000 % epoch 64, minibatch 56/56, validation error 19.130952 % epoch 65, minibatch 56/56, validation error 19.095238 % epoch 66, minibatch 56/56, validation error 17.273810 % epoch 67, minibatch 56/56, validation error 9.202381 % epoch 68, minibatch 56/56, validation error 11.476190 % epoch 69, minibatch 56/56, validation error 10.035714 % epoch 70, minibatch 56/56, validation error 9.333333 % epoch 71, minibatch 56/56, validation error 9.297619 % epoch 72, minibatch 56/56, validation error 8.785714 % epoch 73, minibatch 56/56, validation error 10.773810 % epoch 74, minibatch 56/56, validation error 15.642857 % epoch 75, minibatch 56/56, validation error 8.940476 % epoch 76, minibatch 56/56, validation error 9.380952 % epoch 77, minibatch 56/56, validation error 9.595238 % epoch 78, minibatch 56/56, validation error 8.761905 % epoch 79, minibatch 56/56, validation error 20.011905 % epoch 80, minibatch 56/56, validation error 9.416667 % epoch 81, minibatch 56/56, validation error 21.571429 % epoch 82, minibatch 56/56, validation error 19.333333 % epoch 83, minibatch 56/56, validation error 9.023810 % epoch 84, minibatch 56/56, validation error 10.452381 % epoch 85, minibatch 56/56, validation error 9.595238 % epoch 86, minibatch 56/56, validation error 10.607143 % epoch 87, minibatch 56/56, validation error 9.250000 % epoch 88, minibatch 56/56, validation error 9.940476 % epoch 89, minibatch 56/56, validation error 9.488095 %
*Multiclass Logistic Regression with CG*
import cPickle, time
import theano
import theano.tensor as T
import numpy as np
from sklearn.cross_validation import train_test_split
class LogisticRegression(object):
def __init__(self, input, n_in, n_out):
self.theta = theano.shared(value = np.zeros(n_in*n_out+n_out,
dtype = theano.config.floatX),
name = 'theta',
borrow = True)
self.W = self.theta[:n_in*n_out].reshape((n_in, n_out))
self.b = self.theta[n_in*n_out:]
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
self.y_pred = T.argmax(self.p_y_given_x, axis = 1)
def negative_log_likelihood(self, y):
return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
def errors(self, y):
return T.mean(T.neq(self.y_pred, y))
def share_data(raw_data, dtype=theano.config.floatX):
shared_data = theano.shared(value = np.asarray(raw_data,
dtype=theano.config.floatX),
borrow = True)
return T.cast(shared_data, dtype=dtype)
def cg_optimize(v_train_X, v_train_y, v_validation_X, v_validation_y,
batch_size = 600, n_epoches = 50):
ishape = (28, 28)
n_in = 28 * 28
n_out = 10
n_train_batches = v_train_X.get_value(borrow=True).shape[0] / batch_size
n_validation_batches = v_validation_X.get_value(borrow=True).shape[0] / batch_size
print 'building the model ...'
index = T.lscalar()
x = T.matrix()
y = T.ivector()
classifier = LogisticRegression(input = x, n_in = n_in, n_out = n_out)
cost = classifier.negative_log_likelihood(y)
validate_model = theano.function(inputs = [index],
outputs = classifier.errors(y),
givens = {
x: v_validation_X[index*batch_size:(index+1)*batch_size],
y: v_validation_y[index*batch_size:(index+1)*batch_size]
})
batch_cost = theano.function(inputs = [index],
outputs = cost,
givens = {
x: v_train_X[index*batch_size:(index+1)*batch_size],
y: v_train_y[index*batch_size:(index+1)*batch_size]
})
batch_grad = theano.function(inputs = [index],
outputs = T.grad(cost, classifier.theta),
givens = {
x: v_train_X[index*batch_size:(index+1)*batch_size],
y: v_train_y[index*batch_size:(index+1)*batch_size]
})
## helper function for scipy optimization
best_validation_score = np.inf
def train_fn(theta_value):
classifier.theta.set_value(theta_value, borrow = True)
train_loss = np.mean([batch_cost(i) for i in xrange(n_train_batches)])
return train_loss
def train_fn_grad(theta_value):
classifier.theta.set_value(theta_value, borrow = True)
#grad = batch_grad(0)
grad = sum([batch_grad(i) for i in xrange(n_train_batches)]) / n_train_batches
return grad
def callback(theta_value):
classifier.theta.set_value(theta_value, borrow = True)
## compute the validation loss
validation_loss = np.mean([validate_model(i) for i in xrange(n_validation_batches)])
#print 'validation error %f %%' % validation_loss * 100.
print 'validation error', validation_loss
#if validation_loss < best_validation_score:
# best_validation_score = validation_loss
import scipy.optimize
print 'Optimizing using scipy.optimize.fmin_cg...'
start_time = time.clock()
best_w_b = scipy.optimize.fmin_cg(
f = train_fn,
x0 = np.zeros((n_in+1)*n_out, dtype=x.dtype),
fprime = train_fn_grad,
callback = callback,
disp = 0,
maxiter = n_epoches
)
end_time = time.clock()
print best_w_b
## load digits data
X, y = cPickle.load(open('data/digits.pkl', 'rb'))
print X.shape, y.shape
print np.unique(y)
train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size = 0.2)
v_train_X, v_validation_X = share_data(train_X), share_data(validation_X)
v_train_y, v_validation_y = share_data(train_y, dtype='int32'), share_data(validation_y, dtype='int32')
cg_optimize(v_train_X, v_train_y, v_validation_X, v_validation_y)
(42000, 784) (42000,) [0 1 2 3 4 5 6 7 8 9] building the model ... Optimizing using scipy.optimize.fmin_cg... validation error 0.33130952381 validation error 0.212142857143 validation error 0.169880952381 validation error 0.166666666667 validation error 0.149047619048 validation error 0.137261904762 validation error 0.131666666667 validation error 0.122142857143 validation error 0.117619047619 validation error 0.116785714286 validation error 0.11630952381 validation error 0.113928571429 validation error 0.110714285714 validation error 0.104880952381 validation error 0.101666666667 validation error 0.0985714285714 validation error 0.0983333333333 validation error 0.0969047619048 validation error 0.0953571428571 validation error 0.095 validation error 0.0960714285714 validation error 0.0954761904762 validation error 0.0939285714286 validation error 0.0947619047619 validation error 0.0938095238095 validation error 0.0927380952381 validation error 0.092619047619 validation error 0.0905952380952 validation error 0.0890476190476 validation error 0.0908333333333 validation error 0.09 validation error 0.0891666666667 validation error 0.0890476190476 validation error 0.0888095238095 validation error 0.0879761904762 validation error 0.0886904761905 validation error 0.0883333333333 validation error 0.0890476190476 validation error 0.0895238095238 validation error 0.0884523809524 validation error 0.0869047619048 validation error 0.0875 validation error 0.0857142857143 validation error 0.0878571428571 validation error 0.0872619047619 validation error 0.084880952381 validation error 0.0855952380952 validation error 0.0865476190476 validation error 0.0866666666667 validation error 0.085 [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ..., 1.60823719e-05 -4.80044308e-05 -7.06488857e-06]
*MLPClassification*
import theano
import theano.tensor as T
import time
import numpy as np
class LogisticRegression(object):
## binding with input
def __init__(self, input, n_in, n_out):
self.W = theano.shared(value = np.zeros((n_in, n_out),
dtype = theano.config.floatX),
name = 'W', borrow = True)
self.b = theano.shared(value = np.zeros((n_out, ),
dtype = theano.config.floatX),
name = 'b', borrow = True)
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
self.y_pred = T.argmax(self.p_y_given_x, axis = 1)
self.params = [self.W, self.b]
## binding with y
def negative_log_likelihood(self, y):
return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
def errors(self, y):
return T.mean(T.neq(self.y_pred, y))
class HiddenLayer(object):
def __init__(self, rng, input, n_in, n_out, W = None, b = None, activation=T.tanh):
self.input = input
if W is None:
W_value = np.asarray(rng.uniform(
low = -np.sqrt(6. / (n_in + n_out)),
high = np.sqrt(6. / (n_in + n_out)),
size = (n_in, n_out)),
dtype = theano.config.floatX)
if activation == T.nnet.sigmoid:
W_value *= 4
W = theano.shared(value = W_value, name = 'W', borrow = True)
if b is None:
b_value = np.zeros((n_out, ), dtype = theano.config.floatX)
b = theano.shared(value = b_value, name = 'b', borrow = True)
self.W = W
self.b = b
lin_output = T.dot(input, self.W) + self.b
self.output = lin_output if activation is None else activation(lin_output)
self.params = [self.W, self.b]
class MLP(object):
def __init__(self, rng, input, n_in, n_hidden, n_out):
self.hiddenlayer = HiddenLayer(rng = rng, input = input, n_in = n_in,
n_out = n_hidden, activation = T.tanh)
self.logRegressioinLayer = LogisticRegression(input = self.hiddenlayer.output,
n_in = n_hidden, n_out = n_out)
## L1 norm
self.L1 = abs(self.hiddenlayer.W).sum() + abs(self.logRegressioinLayer.W).sum()
## L2 norm
self.L2_sqr = (self.hiddenlayer.W ** 2).sum() + (self.logRegressioinLayer.W**2).sum()
self.negative_log_likelihood = self.logRegressioinLayer.negative_log_likelihood
self.errors = self.logRegressioinLayer.errors
self.params = self.hiddenlayer.params + self.logRegressioinLayer.params
def share_data(data, dtype = theano.config.floatX):
shared_data = theano.shared(np.asarray(data, dtype=theano.config.floatX),
borrow = True)
return T.cast(shared_data, dtype = dtype)
def run_mlp(v_train_X, v_train_y, v_validation_X, v_validation_y,
learning_rate = 0.01, L1_reg = 0.00, L2_reg = 0.0001, n_epochs = 1000,
batch_size = 20, n_hidden = 500):
n_train_batches = v_train_X.get_value(borrow = True).shape[0] / batch_size
n_validation_batches = v_validation_X.get_value(borrow = True).shape[0] / batch_size
print '... building the model'
## symoblic variables
index = T.lscalar()
x = T.matrix('x')
y = T.ivector('y')
rng = np.random.RandomState(0)
classifier = MLP(rng = rng, input = x, n_in = 28 * 28, n_hidden = n_hidden, n_out=10)
cost = classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr
validate_model = theano.function(inputs = [index],
outputs = classifier.errors(y),
givens = {
x: v_validation_X[index*batch_size:(index+1)*batch_size],
y: v_validation_y[index*batch_size:(index+1)*batch_size]
})
gparams = T.grad(cost, classifier.params)
updates = [(param, param-learning_rate*gparam) for (param, gparam) in zip(classifier.params, gparams)]
train_model = theano.function(inputs = [index],
outputs = cost,
updates = updates,
givens = {
x: v_train_X[index*batch_size:(index+1)*batch_size],
y: v_train_y[index*batch_size:(index+1)*batch_size]
})
print '... training'
patience = 10000
patience_increase = 2
improvement_threshold = 0.995
validation_frequency = min(n_train_batches, patience / 2)
best_params = None
best_validation_loss = np.inf
start_time = time.clock()
epoch = 0
done_looping = False
while (epoch < n_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = train_model(minibatch_index)
iter = (epoch - 1) * n_train_batches + minibatch_index
## if it is time, check the validataion
if (iter+1) % validation_frequency == 0:
validation_loss = np.mean([validate_model(i) for i in xrange(n_validation_batches)])
print 'epoch %i, minibatch %i / %i, validation error %f %%' % (epoch, minibatch_index+1,
n_train_batches, validation_loss * 100.)
if validation_loss < best_validation_loss:
if validation_loss < best_validation_loss * improvement_threshold:
patience = max(patience, iter * patience_increase)
best_validation_loss = validation_loss
best_params = classifier.params
if patience <= iter:
done_looping = True
break
end_time = time.clock()
print 'Optimization Complete. Best Validation score of %f %%' % (best_validation_loss * 100.)
## load digits data
import cPickle
from sklearn.cross_validation import train_test_split
X, y = cPickle.load(open('data/digits.pkl', 'rb'))
print X.shape, y.shape
print np.unique(y)
train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size = 0.2)
v_train_X, v_validation_X = share_data(train_X), share_data(validation_X)
v_train_y, v_validation_y = share_data(train_y, dtype='int32'), share_data(validation_y, dtype='int32')
(42000, 784) (42000,) [0 1 2 3 4 5 6 7 8 9] ... building the model ... training
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-11-0da74d5f7f99> in <module>() 8 v_train_X, v_validation_X = share_data(train_X), share_data(validation_X) 9 v_train_y, v_validation_y = share_data(train_y, dtype='int32'), share_data(validation_y, dtype='int32') ---> 10 run_mlp(v_train_X, v_train_y, v_validation_X, v_validation_y) <ipython-input-9-c99b1f43f020> in run_mlp(v_train_X, v_train_y, v_validation_X, v_validation_y, learning_rate, L1_reg, L2_reg, n_epochs, batch_size, n_hidden) 112 validation_loss = np.mean([validate_model(i) for i in xrange(n_validation_batches)]) 113 print 'epoch %i, minibatch %i / %i, validation error %f %%' % (epoch, minibatch_index+1, --> 114 n_train_batches, valiation_loss * 100.) 115 if validation_loss < best_validation_loss: 116 if validation_loss < best_validation_loss * improvement_threshold: NameError: global name 'valiation_loss' is not defined
run_mlp(v_train_X, v_train_y, v_validation_X, v_validation_y)
... building the model ... training epoch 1, minibatch 1680 / 1680, validation error 10.880952 % epoch 2, minibatch 1680 / 1680, validation error 10.607143 % epoch 3, minibatch 1680 / 1680, validation error 10.357143 % epoch 4, minibatch 1680 / 1680, validation error 9.845238 % epoch 5, minibatch 1680 / 1680, validation error 10.154762 % epoch 6, minibatch 1680 / 1680, validation error 9.154762 % epoch 7, minibatch 1680 / 1680, validation error 9.892857 % epoch 8, minibatch 1680 / 1680, validation error 9.797619 % epoch 9, minibatch 1680 / 1680, validation error 9.738095 % epoch 10, minibatch 1680 / 1680, validation error 9.964286 % epoch 11, minibatch 1680 / 1680, validation error 9.964286 % Optimization Complete. Best Validation score of 9.154762 %
*Convolutionary Neural Network (LeNet5)*
import theano
import theano.tensor as T
import time
import numpy as np
from theano.tensor.signal import downsample
from theano.tensor.nnet import conv
class LeNetConvPoolLayer(object):
"""
Pool Layer of a convolutional network
"""
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
"""
Allocate a LeNetConvPoolLayer with Shared variable internal params
rng = np.random.RandomState
input = theano.tensor.dtensor4, symbolic image tensor
filter_shape = tuple of list of length 4,
(n_filters, n_input_feats_maps, filter_height, filter_width)
image_shape = (batch_size, n_input_feats_maps, img_height, img_width)
poolsize = the downsampling (pooling) factor (n_rows, n_cols)
"""
assert image_shape[1] == filter_shape[1]
self.input = input
## there are n_input_feats_maps * n_filter_height * n_filter_width
## inputs to each hidden unit
fan_in = np.prod(filter_shape[1:])
## each unit in the lower layer receives a gradient from:
## n_output_feats_maps * filter_height * filter_width / pooling_size
fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) / np.prod(poolsize))
## initialize weights
W_bound = np.sqrt(6. / (fan_in + fan_out))
self.W = theano.shared(np.asarray(rng.uniform(
low = -W_bound,
high = W_bound,
size = filter_shape),
dtype=theano.config.floatX),
borrow = True)
b_values = np.zeros((filter_shape[0], ), dtype = theano.config.floatX)
self.b = theano.shared(value = b_values, borrow = True)
## convolve input feature maps with filters
conv_out = conv.conv2d(input = input, filters = self.W,
filter_shape = filter_shape, image_shape = image_shape)
## downsample each feature map individually, using maxpooling
pooled_out = downsample.max_pool_2d(input = conv_out,
ds = poolsize, ignore_border = True)
## add the bias term, but first recast into a tensor shape of
## (1, n_filters, 1, 1)
self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
self.params = [self.W, self.b]
class LogisticRegression(object):
## binding with input
def __init__(self, input, n_in, n_out):
self.W = theano.shared(value = np.zeros((n_in, n_out),
dtype = theano.config.floatX),
name = 'W', borrow = True)
self.b = theano.shared(value = np.zeros((n_out, ),
dtype = theano.config.floatX),
name = 'b', borrow = True)
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
self.y_pred = T.argmax(self.p_y_given_x, axis = 1)
self.params = [self.W, self.b]
## binding with y
def negative_log_likelihood(self, y):
return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
def errors(self, y):
return T.mean(T.neq(self.y_pred, y))
class HiddenLayer(object):
def __init__(self, rng, input, n_in, n_out, W = None, b = None, activation=T.tanh):
self.input = input
if W is None:
W_value = np.asarray(rng.uniform(
low = -np.sqrt(6. / (n_in + n_out)),
high = np.sqrt(6. / (n_in + n_out)),
size = (n_in, n_out)),
dtype = theano.config.floatX)
if activation == T.nnet.sigmoid:
W_value *= 4
W = theano.shared(value = W_value, name = 'W', borrow = True)
if b is None:
b_value = np.zeros((n_out, ), dtype = theano.config.floatX)
b = theano.shared(value = b_value, name = 'b', borrow = True)
self.W = W
self.b = b
lin_output = T.dot(input, self.W) + self.b
self.output = lin_output if activation is None else activation(lin_output)
self.params = [self.W, self.b]
def share_data(data, dtype = theano.config.floatX):
shared_data = theano.shared(np.asarray(data, dtype=theano.config.floatX),
borrow = True)
return T.cast(shared_data, dtype = dtype)
def run_lenet5(v_train_X, v_train_y, v_validation_X, v_validation_y,
learning_rate = 0.01, n_epochs = 200,
batch_size = 500, n_kerns = [20, 50]):
"""
n_kerns = number of kernels on each layer
"""
n_train_batches = v_train_X.get_value(borrow = True).shape[0] / batch_size
n_validation_batches = v_validation_X.get_value(borrow = True).shape[0] / batch_size
print '... building the model'
## symoblic variables
index = T.lscalar()
x = T.matrix('x')
y = T.ivector('y')
ishape = (28, 28)
rng = np.random.RandomState(0)
## reshape matrix of rasterized images of share (batch_size, 28*28)
## to a 4D tensor, compartible with LeNetConvPoolLayer
layer0_input = x.reshape((batch_size, 1, 28, 28))
## construct the first convolutional pooling layer
## filtering reduces the image size to (28-5+1, 28-5+1) = (24, 24)
## maxpooling reduces this further to (24/2, 24/2) = (12, 12)
## 4D output tensor is thus of shape (batch_size, n_kerns[0], 12, 12)
layer0 = LeNetConvPoolLayer(rng, input = layer0_input,
image_shape = (batch_size, 1, 28, 28),
filter_shape = (n_kerns[0], 1, 5, 5),
poolsize = (2, 2))
## construct the second convolutional pooling layer
## filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
## maxpooling reduces this further to (8/2, 8/2) = (4, 4)
## 4D output tensor is thus of shape (n_kerns[0], n_kerns[1], 4, 4)
layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
image_shape = (batch_size, n_kerns[0], 12, 12),
filter_shape = (n_kerns[1], n_kerns[0], 5, 5),
poolsize = (2, 2))
layer2_input = layer1.output.flatten(2)
## a fully_connected sigmoidal layer
layer2 = HiddenLayer(rng, input=layer2_input, n_in = n_kerns[1]*4*4,
n_out = 500, activation=T.tanh)
## classify the values of the fully_connected sigmodial layer
layer3 = LogisticRegression(input = layer2.output, n_in = 500, n_out = 10)
cost = layer3.negative_log_likelihood(y)
validate_model = theano.function([index], layer3.errors(y),
givens = {
x: v_validation_X[index*batch_size:(index+1)*batch_size],
y: v_validation_y[index*batch_size:(index+1)*batch_size]
})
params = layer3.params + layer2.params + layer1.params + layer0.params
grads = T.grad(cost, params)
updates = [(p, p-learning_rate*gp) for (p, gp) in zip(params, grads)]
train_model = theano.function([index], cost, updates = updates,
givens = {
x: v_train_X[index*batch_size:(index+1)*batch_size],
y: v_train_y[index*batch_size:(index+1)*batch_size]
})
print '... training'
patience = 10000
patience_increase = 2
improvement_threshold = 0.995
validation_frequency = min(n_train_batches, patience / 2)
best_params = None
best_validation_loss = np.inf
start_time = time.clock()
epoch = 0
done_looping = False
while (epoch < n_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = train_model(minibatch_index)
iter = (epoch - 1) * n_train_batches + minibatch_index
## if it is time, check the validataion
if (iter+1) % validation_frequency == 0:
validation_loss = np.mean([validate_model(i) for i in xrange(n_validation_batches)])
print 'epoch %i, minibatch %i / %i, validation error %f %%' % (epoch, minibatch_index+1,
n_train_batches, validation_loss * 100.)
if validation_loss < best_validation_loss:
if validation_loss < best_validation_loss * improvement_threshold:
patience = max(patience, iter * patience_increase)
best_validation_loss = validation_loss
best_params = params
if patience <= iter:
done_looping = True
break
end_time = time.clock()
print 'Optimization Complete. Best Validation score of %f %%' % (best_validation_loss * 100.)
## load digits data
import cPickle
from sklearn.cross_validation import train_test_split
X, y = cPickle.load(open('data/digits.pkl', 'rb'))
print X.shape, y.shape
print np.unique(y)
train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size = 0.2)
v_train_X, v_validation_X = share_data(train_X), share_data(validation_X)
v_train_y, v_validation_y = share_data(train_y, dtype='int32'), share_data(validation_y, dtype='int32')
(42000, 784) (42000,) [0 1 2 3 4 5 6 7 8 9]
run_lenet5(v_train_X, v_train_y, v_validation_X, v_validation_y)
... building the model ... training epoch 1, minibatch 67 / 67, validation error 21.625000 % epoch 2, minibatch 67 / 67, validation error 16.612500 % epoch 3, minibatch 67 / 67, validation error 13.462500 % epoch 4, minibatch 67 / 67, validation error 11.537500 % epoch 5, minibatch 67 / 67, validation error 10.162500 % epoch 6, minibatch 67 / 67, validation error 9.250000 % epoch 7, minibatch 67 / 67, validation error 8.512500 % epoch 8, minibatch 67 / 67, validation error 7.900000 % epoch 9, minibatch 67 / 67, validation error 7.450000 % epoch 10, minibatch 67 / 67, validation error 6.862500 % epoch 11, minibatch 67 / 67, validation error 6.525000 % epoch 12, minibatch 67 / 67, validation error 6.112500 % epoch 13, minibatch 67 / 67, validation error 5.937500 % epoch 14, minibatch 67 / 67, validation error 5.637500 % epoch 15, minibatch 67 / 67, validation error 5.437500 % epoch 16, minibatch 67 / 67, validation error 5.325000 % epoch 17, minibatch 67 / 67, validation error 5.212500 % epoch 18, minibatch 67 / 67, validation error 5.137500 % epoch 19, minibatch 67 / 67, validation error 4.875000 % epoch 20, minibatch 67 / 67, validation error 4.700000 % epoch 21, minibatch 67 / 67, validation error 4.537500 % epoch 22, minibatch 67 / 67, validation error 4.312500 % epoch 23, minibatch 67 / 67, validation error 4.287500 % epoch 24, minibatch 67 / 67, validation error 4.200000 % epoch 25, minibatch 67 / 67, validation error 4.150000 % epoch 26, minibatch 67 / 67, validation error 4.087500 % epoch 27, minibatch 67 / 67, validation error 3.937500 % epoch 28, minibatch 67 / 67, validation error 3.887500 % epoch 29, minibatch 67 / 67, validation error 3.825000 % epoch 30, minibatch 67 / 67, validation error 3.750000 % epoch 31, minibatch 67 / 67, validation error 3.725000 % epoch 32, minibatch 67 / 67, validation error 3.687500 % epoch 33, minibatch 67 / 67, validation error 3.525000 % epoch 34, minibatch 67 / 67, validation error 3.512500 % epoch 35, minibatch 67 / 67, validation error 3.487500 % epoch 36, minibatch 67 / 67, validation error 3.437500 % epoch 37, minibatch 67 / 67, validation error 3.362500 % epoch 38, minibatch 67 / 67, validation error 3.325000 % epoch 39, minibatch 67 / 67, validation error 3.325000 % epoch 40, minibatch 67 / 67, validation error 3.200000 % epoch 41, minibatch 67 / 67, validation error 3.112500 % epoch 42, minibatch 67 / 67, validation error 3.087500 % epoch 43, minibatch 67 / 67, validation error 3.187500 % epoch 44, minibatch 67 / 67, validation error 3.037500 % epoch 45, minibatch 67 / 67, validation error 3.037500 % epoch 46, minibatch 67 / 67, validation error 3.000000 % epoch 47, minibatch 67 / 67, validation error 3.087500 % epoch 48, minibatch 67 / 67, validation error 2.950000 % epoch 49, minibatch 67 / 67, validation error 2.950000 % epoch 50, minibatch 67 / 67, validation error 2.925000 % epoch 51, minibatch 67 / 67, validation error 2.900000 % epoch 52, minibatch 67 / 67, validation error 2.950000 % epoch 53, minibatch 67 / 67, validation error 2.887500 % epoch 54, minibatch 67 / 67, validation error 2.825000 % epoch 55, minibatch 67 / 67, validation error 2.850000 % epoch 56, minibatch 67 / 67, validation error 2.812500 % epoch 57, minibatch 67 / 67, validation error 2.787500 %
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-20-46d6e5ce392b> in <module>() ----> 1 run_lenet5(v_train_X, v_train_y, v_validation_X, v_validation_y) <ipython-input-19-b3e17b853362> in run_lenet5(v_train_X, v_train_y, v_validation_X, v_validation_y, learning_rate, n_epochs, batch_size, n_kerns) 165 epoch = epoch + 1 166 for minibatch_index in xrange(n_train_batches): --> 167 minibatch_avg_cost = train_model(minibatch_index) 168 iter = (epoch - 1) * n_train_batches + minibatch_index 169 ## if it is time, check the validataion /Library/Python/2.7/site-packages/Theano-0.6.0rc3-py2.7.egg/theano/compile/function_module.pyc in __call__(self, *args, **kwargs) 578 t0_fn = time.time() 579 try: --> 580 outputs = self.fn() 581 except Exception: 582 if hasattr(self.fn, 'position_of_error'): KeyboardInterrupt: