import scipy as sp
import numpy as np
X = np.asarray([
[1, 2],
[3.5, 4],
[5, 6]
])
import theano
import theano.tensor as T
from theano.tensor import nnet
## Sigmoid
def sigmoid(X):
"""
numpy.array -> numpy.array
compute sigmoid function: 1 / (1 + exp(-X))
All elememnts should be in [0, 1]
"""
return 1. / (1. + np.exp(-X))
def sigmoid_grad(X):
"""
It implements element-wise gradient, which takes each element
as a scalar
"""
sig = sigmoid(X)
return sig * (1 - sig)
## theano version - the cost of grad must be a scalar
#x = T.matrix()
#tsigmoid = theano.function(inputs = [x], outputs = nnet.sigmoid(x))
#tsgimoid_grad = theano.function(inputs = [x], outputs = theano.grad(nnet.sigmoid(x), x))
print sigmoid(X)
print
print T.nnet.sigmoid(X).eval()
print
print sigmoid_grad(X)
[[ 0.73105858 0.88079708] [ 0.97068777 0.98201379] [ 0.99330715 0.99752738]] [[ 0.73105858 0.88079708] [ 0.97068777 0.98201379] [ 0.99330715 0.99752738]] [[ 0.19661193 0.10499359] [ 0.02845302 0.01766271] [ 0.00664806 0.00246651]]
## softmax
def softmax(X):
"""
numpy.array -> numpy.array
Compute softmax function: exp(X) / sum(exp(X, 1))
where each row of X is a vector output (e.g., different columns representing
outputs for different classes)
The output of softmax is a matrix, with the sum of each row to be nearly 1.0
as it is the probabilities that are calculated.
"""
mx = np.max(X)
ex = np.exp(X - mx) # prefer zeros over stack overflow - but NOT always useful
return ex / np.sum(ex, 1).reshape(-1, 1)
def softmax_grad(X):
sm = softmax(X)
return sm * (1-sm)
print softmax(X)
x = T.matrix('x')
f = theano.function(inputs = [x], outputs = T.nnet.softmax(x))
print
print f(X)
print
print softmax_grad(X)
[[ 0.26894142 0.73105858] [ 0.37754067 0.62245933] [ 0.26894142 0.73105858]] [[ 0.26894142 0.73105858] [ 0.37754067 0.62245933] [ 0.26894142 0.73105858]] [[ 0.19661193 0.19661193] [ 0.23500371 0.23500371] [ 0.19661193 0.19661193]]
## tanh and derivatives
def tanh(X):
return np.tanh(X)
def tanh_grad(X):
return 1. / np.square(np.cosh(X))
print tanh(X)
print
print tanh_grad(X)
[[ 0.76159416 0.96402758] [ 0.9981779 0.9993293 ] [ 0.9999092 0.99998771]] [[ 4.19974342e-01 7.06508249e-02] [ 3.64088472e-03 1.34095068e-03] [ 1.81583231e-04 2.45765474e-05]]
## linear and derivatives
def linear(X):
return X
def linear_grad(X):
return np.ones(X.shape)
## rectified linear (ReLU) and derivatives
def rectified_linear(X):
return X * (X > 0)
def rectified_linear_grad(X):
return (X > 0).astype('b') # return int
print linear(X), '\n'
print linear_grad(X), '\n'
print rectified_linear(X), '\n'
print rectified_linear_grad(X), '\n'
[[ 1. 2. ] [ 3.5 4. ] [ 5. 6. ]] [[ 1. 1.] [ 1. 1.] [ 1. 1.]] [[ 1. 2. ] [ 3.5 4. ] [ 5. 6. ]] [[1 1] [1 1] [1 1]]
import scipy as sp
import numpy as np
class Model(object):
"""
Base class for learning models. All model types inheriting this object must include:
- attrs_ class variable: for pretty printing
- cost function: that takes the model and data (and possibly labels) and computes the the cost
of the data given the model. Must return a numeric cost and a gradient.
- train function: that takes the model and data (and possibly labels) and transforms the data
into a suitable format to pass into an optimized algorithm
- update function: that takes the model and gradient to update the model parameters
"""
attrs_ = []
def __repr__(self):
meat = ', '.join(['%s = %s' % (attr, str(getattr(self, attr))) for attr in self.attrs_])
return self.__class__.__name__ + '(' + meat + ')'
def cost(self, data, target):
raise Error('To be implemented')
def train(self, data, target):
raise Error('To be implemented')
def update(self, grad):
raise Error('To be implemented')
from numpy.random import uniform
import numpy as np
from functools import partial
def initialize_weights(nin, nout):
"""
weight shape - nout * nin
X shape - nin * nrow
"""
return (uniform(-1, 1, size = (nout, nin))) / np.sqrt(nin + nout)
def compute_numerical_gradient(model, data, target = None, err = 1e-8):
raise Error('TO BE IMPLEMENTED')
## TESTING
print initialize_weights(3, 10)
[[ 0.05954632 -0.21633934 -0.16199111] [-0.07968537 -0.00524121 -0.07855087] [ 0.10327397 0.03170066 0.02869144] [-0.00843493 -0.01846055 0.12962895] [ 0.19265302 -0.08632744 0.19428462] [ 0.04206315 -0.00778248 -0.1927412 ] [ 0.2437375 0.25630775 -0.19549379] [-0.14286722 -0.02028118 0.10749867] [-0.10868784 0.02095068 0.2106905 ] [ 0.01285801 -0.23510364 0.23589165]]
from scipy import sparse
from numpy.random import uniform
MODELFNS = {
'sigmoid': sigmoid
, 'softmax': softmax
, 'tanh': tanh
, 'linear': linear
, 'ReLU': rectified_linear
}
GRADFNS = {
'sigmoid': sigmoid_grad
, 'softmax': softmax_grad
, 'tanh': tanh_grad
, 'linear': linear_grad
, 'ReLU': rectified_linear_grad
}
class MLPLayer(Model):
attrs_ = ['nin', 'nout', 'modelfn', 'dropout']
def __init__(self, nin, nout, modelfn = 'sigmoid', dropout = 0.0):
self._W = initialize_weights(nin + 1, nout).astype(np.float)
self._nin = nin
self._nout = nout
self._modelfn = modelfn
self.dropout = dropout
@property
def nin(self):
return self._nin
@property
def nout(self):
return self._nout
@property
def modelfn(self):
return self._modelfn
@property
def W(self):
return self._W
@W.setter
def W(self, value):
self._W = value
@property
def l2_penalty(self):
## excluding offset b parameter, which is the first COLUMN
return ??
def propup(self, X, ispred = False):
return ??
def backprop(self, ??):
pass