Lua¶

Variables and Printing¶

In [ ]:

--assignment and printing 

a, b = 24, "tacos" -- can assign tuple-style, like in python 
c = 'please' -- can enclose string literals in single or double quotes
print(a, b, c, "\n")

In [ ]:

-- string concatenation

d = b .. ', ' .. c
print(d)

Scalar Math¶

In [ ]:

-- syntax similar to MATLAB
print(2*a, a^2, a%2, "\n")

In [ ]:

-- note that all numbers are implicitly floats/doubles!
print(a/7, "\n")

In [ ]:

-- if you want to ensure you get something integral, use math.ceil or math.floor
print(math.floor(a/7), math.ceil(a/7), "\n")

In [ ]:

-- some other useful math functions
print(math.min(1, 22, 44), math.max(1, 22, 44), "\n")

Control Flow¶

In [ ]:

-- while loops are enclosed in while-do-end blocks
i = 1
while i < 3 do
    print(i)
    i = i + 1 -- N.B. no 'i += 1' or 'i++' syntax in Lua 
end

In [ ]:

-- for-loops iterate over a range of numbers, INCLUSIVE!
for i = 3, 5 do
    print(i)
end

In [ ]:

-- like in python, you can specify the step size with a 3rd loop argument
for i = 10, 1, -4 do
    print(i)
end

In [ ]:

-- conditional statements go in if-then-elseif-else-end blocks
val = 24

if val == 0 then
    print("zero!")
elseif val%2 == 0 then
    print("even and nonzero!")
elseif val ~= 13 then           -- N.B. Lua uses '~=' to mean '!='; also works for strings!
    print("odd and not 13!")
else
    print("everything else!")
end

In [ ]:

-- lua allows the 'break' keyword
for i = 1, 3 do
    if i % 2 == 0 then
        break
    end
    print(i)
end

-- but it doesn't have 'continue'; 
-- see http://lua-users.org/wiki/ContinueProposal and 
-- http://stackoverflow.com/questions/3524970/why-does-lua-have-no-continue-statement for some workarounds

Truth and Falsity¶

In [ ]:

-- nil and false evaluate to false
a, b =  nil, false

-- everything else evaluates to true
c, d = "taco", 0

if a or b then
    print("first!")
elseif c and d then
    print("second!")
else
    print("third!")
end

In [ ]:

-- 'and' and 'or' have interesting side effects; allow for 'ternary if' as follows:
val2 = a and 1 or 2 -- a is falsey, so we get 2
print(val2, "\n")

val3 = c and 3 or 4 --  c is truthy, so we get 3
print(val3, "\n")

Functions¶

In [ ]:

-- local vs global variables

var = 22 -- global

function f1()
    local var = 33 -- N.B. local variables generally lead to faster code! 
    return var + 1
end

print(f1(), "\n")

function f2()
    return var + 1
end

print(f2(), "\n")

In [ ]:

-- default and extra arguments

function encodeDigits(a, b, c)
    local a = a or 0 -- common convention for specifying default args
    local b = b or 0
    local c = c or 0
    assert(a >= 0 and a < 10)
    assert(b >= 0 and b < 10)
    assert(c >= 0 and c < 10)    
    return a*1 + b*10 + c*100
end

print(encodeDigits(1, 2, 3),"\n") -- no defaults used
print(encodeDigits(2),"\n") -- defaults for b and c used
print(encodeDigits(nil, 2),"\n") -- defaults for a and c used
print(encodeDigits(), "\n") -- all defaults used
print(encodeDigits(1, 2, 3, 4),"\n") -- 4th argument ignored

In [ ]:

-- returning multiple values

function divWithRemainder(a, b)
    return math.floor(a/b), a%b
end

d, r = divWithRemainder(10, 3)
print(d, r, "\n")

-- if you attempt to place multiple values in a single variable, lua just forgets the values after the first
d = divWithRemainder(10 ,3)
print(d) -- N.B. you don't get a tuple like in python; just 3

-- (function stuff outside the scope of this tutorial: functions are first class objects, closures)

Tables (more or less the only native data-structure provided by Lua)¶

Tables as Dictionaries¶

In [ ]:

-- tables can be used as hash-based associative arrays (like python dictionaries)
t1 = {} -- construct an empty table
t1["one"] = 1
t1["two"] = 2
t1[3] = "three"
print(t1, "\n")

t2 = {["one"]=1, ["two"]=2, [3]="three"} -- constructing a table literal
print(t2, "\n")

In [ ]:

-- can access string attributes either with brackets, or with dot notation
print(t2["one"], t2[3], "\n")
print(t2.one)

In [ ]:

-- iterating over key, value pairs
for k,v in pairs(t1) do
    print(k,v)
end

In [ ]:

-- remove elements from dictionaries by setting to nil
t1["one"] = nil
print(t1,"\n")

Tables as (ordered) arrays¶

In [ ]:

-- when a table uses only integer keys 1..n, it can also function as an array!
-- N.B. Tables (and tensors) are 1-indexed!!!
arr = {} -- construct an empty array
arr[1] = "one"
arr[2] = "two"
arr[3] = "three"
print(arr,"\n")

arr2 = {"one", "two", "three"} -- construct an array literal
print(arr2)

In [ ]:

-- can get the length of an array by prepending with '#'
print(#arr,"\n") 

--N.B. '#' only works with array-style tables (and not with dictionary-style tables)
  -- If you want to get the size of a dictionary in constant time, you need to store it somewhere; gross!
ugh = {["one"]=1, ["two"]=2}
print(#ugh,"\n") -- misleading!

In [ ]:

-- instead to using integer keys to index, can also append to table as follows
arr3 = {}
table.insert(arr3, "one") -- equivalent to t[#t+1] = "one"
table.insert(arr3, "two")
print(arr3)

In [ ]:

-- can iterate over an array in order as follows
for i, el in ipairs(arr2) do -- ipairs() is like enumerate() in python
    print(i, el)
end

In [ ]:

-- to remove elements from array, use table.remove (which is inefficient)
table.remove(arr2,2)
print(arr2,"\n")

Torch¶

Tensor Basics¶

In [ ]:

--[[ Tensors are multi-dimensional generalizations of arrays/matrices, and are the primary data-structure provided
     by Torch (just as arrays are the primary data-structure providedy by Numpy). Tensors are great, and anytime  
     you can use them you probably should.

     Also check out https://github.com/torch/torch7/blob/master/doc/tensor.md for documentation on Tensor objects,
     and https://github.com/torch/torch7/blob/master/doc/maths.md for documentation on mathematical operations
     defined on Tensors
--]]

-- here are some ways of constructing Tensors (of different sizes and dimensions)
A = torch.Tensor(3, 3) -- an empty 3x3 Tensor (initialized with garbage)
B = torch.zeros(3, 3, 2) -- 3x3x2 Tensor initalized with zeros
C = torch.ones(3, 1, 3)  -- 3x1x3 Tensor initialized with ones
D = torch.randn(2) -- 2-vector (still a Tensor) initialized with standard gaussian noise
E = torch.rand(1, 1, 1, 1)  -- 1x1x1x1 Tensor initialized with uniform noise
F = torch.Tensor({{1, 1}, {2, 2}}) -- 2x2 literal tensor

print(A,"\n")
print(B,"\n")
print(C,"\n")
print(D,"\n")
print(E,"\n")
print(F)

In [ ]:

-- by default Tensor() gives you a "real" Tensor, and you can set whether "real" defaults to float or double.
-- if you want to explicitly pick one, there are also specialized constructors
A = torch.FloatTensor(3, 3)
print(A,"\n")
B = torch.LongTensor(3, 3) -- N.B. LongTensors hold integers and are very important; we use them to store indices
print(B,"\n")

In [ ]:

-- some important ways to get Tensor metadata
A = torch.randn(2,3)
print(A:dim(),"\n") -- number of dimensions
print(A:size(1),"\n") -- size along 1st dimension; can do any (existing) dimension e.g. A:size(2)
print(A:size()) -- gives a data structure with sizes of ALL dimensions; not actually that useful
print(A:nElement(),"\n") -- total number of element
print(A:isContiguous()) -- does Tensor address a contiguous block of memory

Views on Tensors¶

In [ ]:

-- can "view" a tensor in a different shape without doing any copy
a = torch.range(1,6) -- numbers 1 thru 6
print(a,"\n")
A = a:view(2,3) -- the ':' notation implicitly adds 'self' to a function call (when defined on objects)
print(A)

-- note view() reshapes along rows (like C and numpy), not along columns (like fortran and R) 
B = A:view(3,2)
print(B,"\n")

-- note a, A, and B address the same memory!
B:zero() -- zeroes out a tensor
print(a)

Accessing Sub-Tensors¶

In [ ]:

-- index notation allows you to index along the first dimension
A = torch.range(1,6):view(2,3)
firstRow = A[1]
print(A,"\n")
print(firstRow)

-- this does no memory copy!

In [ ]:

-- select() allows you to index along any dimension
firstCol = A:select(2,1) -- select()'s first argument is the desired dimension
print(firstCol)
-- also does no memory copy!

In [ ]:

-- instead of accessing a single index, can narrow a Tensor along a chosen dimension
firstRow = A:narrow(1,1,1) -- arguments are dim, startIdx, length along dim to extract
last2Cols = A:narrow(2,2,2)
print(firstRow,"\n")
print(last2Cols)

-- this also addresses SAME memory as in original tensor
last2Cols:zero()
print(A)

In [ ]:

-- while narrow() calls can be chained, can also use sub() to narrow along first 4 dimensions
A = torch.range(1,6):view(2,3)
firstRow = A:sub(1,1) -- arguments are start and stop idx (inclusive) for each dimension (up to 4)
last2Cols = A:sub(1,2,2,3) -- using start and stop indices for first two dimensions here
bottomRight = A:sub(2,2,3,3)

print(firstRow,"\n")
print(last2Cols,"\n")
print(bottomRight,"\n")

-- as above, this addresses same memory as original

In [ ]:

-- instead of using sub() and narrow(), can also specify ranges by indexing with tables
firstRow = A[{{1,1},{}}] -- expects table of range-tables, 1 for each dimension; empty table means everything
last2Cols = A[{{},{2,3}}] -- note similarity to sub()
bottomRight = A[{{2,2},{3,3}}]

print(firstRow,"\n")
print(last2Cols,"\n")
print(bottomRight,"\n")

Sparse Indexing¶

In [ ]:

-- we can select non-contiguous items along the first dimension using index()
A = torch.range(1,9):view(3,3)
idxs = torch.LongTensor({1,3}) -- indices are often required to be stored in LongTensors
firstAndThirdRows = A:index(1,idxs) -- first argument is the dimension
print(A,"\n")
print(firstAndThirdRows,"\n")

--N.B. index() does a memory copy!
firstAndThirdRows:zero()
print(A)

In [ ]:

-- can also update a matrix sparsely with indexAdd()
A = torch.zeros(3,3)
idxs = torch.LongTensor({1,3})
U = torch.randn(2,3)
A:indexAdd(1,idxs,U) -- U must be of dimension idxs:size(1) x A:size(2)
print(A)
-- there's also indexFill() and indexCopy(); see the documentation!

In-place Operations vs. Copying Operations (Very Important!)¶

In [ ]:

-- for most (mathematical) operations defined on tensors, you will have a choice between allocating new memory for
-- the result of the operation, or placing the result in some already-allocated tensor.
-- for example, let's consider taking the element-wise absolute value of a tensor A
A = torch.randn(3,3)

-- if we want to allocate a NEW tensor B s.t. B = abs(A), we do the following
B = torch.abs(A) -- in general, using torch.f to call a function f on a tensor will allocate new memory

-- let's make sure A has not changed
print(A,"\n")
print(B,"\n")

In [ ]:

-- suppose instead we have some tensor C lying around that we want to use to store abs(A)
C = torch.Tensor(3,3)
-- we can use C to store abs(A) as follows
C:abs(A) -- recall the ':' notation is short-hand for passing 'self' to a function defined on an object
         -- in general, calling X:f(args) will use X's memory to store the result of f(args)
print(C)

In [ ]:

-- often it is convenient to use a tensor to store its own result
A:abs()
-- now A has changed
print(A)

In [ ]:

-- the in-place/copying distinction is important for 2 reasons:
-- 1) doing in-place operations is generally much faster than allocating memory repeatedly (e.g., in a loop)
-- 2) on the other hand, easy to mess up your data by accidentally doing things in-place

Element-wise mathematical operations¶

In [ ]:

-- many useful elementwise operations are defined, and (as above), can be used in-place or not
torch.sqrt(A)
A:sqrt()
torch.tanh(A)
A:tanh()
-- can add or multiply by constant scalars
A:add(0.5)
A:mul(2.6)
A:div(1)

Row or Column-wise Operations¶

In [ ]:

-- can take sum, mean, stddev of rows or columns as follows
A = torch.randn(2,3)
colSums = A:sum(1) -- sum along first dimension; can also do A:mean(1), A:std(1), etc
rowSums = A:sum(2) -- sum along second dimension; can also do A:mean(2), A:std(2), etc

In [ ]:

-- torch combines max and argmax() 
a = torch.range(2,6)
maxval, argmax = a:max(1) -- argument specifies dimension
print(maxval, argmax)
-- can also take min()

Linear Algebra¶

In [ ]:

-- adding and multiplying tensors
A = torch.randn(2,3)
B = torch.randn(2,3)
A:add(B) -- puts A+B in A; if want new memory, do torch.add(A,B)
B:cmul(A) -- puts ELEMENTWISE multiplication of A and B in B
B:cdiv(A) -- puts ELEMENTWISE division B/A in B

In [ ]:

-- matrix multiplication
Ans = torch.Tensor(2,2) -- we'll use this to store an answer
-- computes (A B^T) and puts it in Ans
Ans:mm(A,B:t())  -- N.B. B:t() transposes B

-- dot products
dotprod = A:dot(B) -- note, A and B don't need to be vectors (that is, they can have dim > 1)

-- matrix-vector products
mvAns = torch.Tensor(2) -- stores mv-prod answer
v = torch.randn(3) -- 1 dimensional, so a vector
mvAns:mv(A,v) -- note could also have done mvAns:view(2,1):mm(A,v:view(3,1))

"Add"-style Linear Algebra¶

In [ ]:

-- instead of overwriting memory w/ a result, you may want to add it to something already computed
u = torch.ones(3)
v = torch.Tensor(3):fill(2.2)
w = torch.Tensor(3):fill(2)
c = 3
-- compute u = u + c * (v .* w), where .* is elementwise multiplication
u:addcmul(c,v,w)
print(u,"\n")
-- N.B. can also do addcdiv(), which will often be very handy!

In [ ]:

-- add-style matrix multiplication
Ans = torch.ones(3,3)
A = torch.eye(3) -- torch.eye makes an identity matrix
B = torch.eye(3):mul(2)
Ans:addmm(c,A,B) -- N.B. many more options here; see the documentation!
print(Ans)

In [ ]:

-- possible to use infix notation like in numpy/matlab, but it always does a copy, so try to avoid it!
A = torch.randn(3,3)
B = torch.randn(3,3)
A = A + B
C = A * B
-- etc

OOP¶

We won't cover classes/object-oriented programming here, but you can easily define classes with torch.class, as described here https://github.com/torch/torch7/blob/master/doc/utility.md#torch.class

Exercise¶

To tie everything together, let's implement finite difference gradient-checking for a simple function¶

Let's consider the quadratic function $$f(\mathbf{x}) = \frac{1}{2} \mathbf{x}^{T} \mathbf{A} \mathbf{x} + \mathbf{b}^T \mathbf{x} \,$$, where $\mathbf{A} \,$ is symmetric and $\mathbf{x} \in \mathbb{R}^D$.

The gradient in this case is $$ \nabla_{\mathbf{x}}(f) = \langle \frac{\partial f}{\partial x_1}, \ldots, \frac{\partial f}{\partial x_D} \rangle = \mathbf{A} \mathbf{x} + \mathbf{b} \,$$

Suppose that we compute the gradient above analytically (in code). We can use finite-differences to check that our implementation is bug-free. First, define the masking vector $\mathbf{m}^j = \epsilon \cdot \mathbf{\delta}(j)$, where $\mathbf{m}, \mathbf{\delta} \in \mathbb{R}^D$ and $\epsilon$ is some very small scalar value (e.g., 0.00001).

If our computed gradient is correct, we expect $$ \frac{\partial f}{\partial x_j} \approx \frac{f(\mathbf{x} + \mathbf{m}^j) - f(\mathbf{x} - \mathbf{m}^j)}{2\epsilon} \, $$ The term on the RHS of the equation above is the ``central'' finite difference wrt $x_j$.

We'll implement $f$, its gradient, and a finite difference checker below!

(P.S. I have no idea how to prevent those bars to the right of the equations from showing up, but ignore them...)

In [ ]:

-- let's define some global memory we'll update, and some fixed, global parameters
buf = nil
grad = nil

torch.manualSeed(287)
D = 3 -- dimensionality of x
A = torch.randn(D,D)
-- ensure symmetric (note this does a memory copy!)
A = A + A:t()
b = torch.randn(D)
x = torch.randn(D)

In [ ]:

-- implementation of f(x) = x^T A x + b^T x
function f(x,A,b) 
  if buf == nil then
    buf = torch.Tensor()
  end
  buf:resize(A:size(1))
  -- first do Ax
  buf:mv(A,x)
  return buf:dot(x)*0.5 + b:dot(x)
end

In [ ]:

-- implementation of \nabla_x(f) = Ax + b
function dfdx(x,A,b)
  if grad == nil then
    grad = torch.Tensor()
  end
  grad:resizeAs(x)
  grad:mv(A,x)
  grad:add(b)
  return grad
end

In [ ]:

function finiteDiffChecker(f,dfdx,x,A,b)
  -- first let's compute the gradient at our current point
  local grad = dfdx(x,A,b)
  -- now let's check it with finite differences
  local eps = 1e-5 -- use this for computing finite diffs
  local xcopy = x:clone()
  print("grad", "         finite-difference")
  for j = 1, grad:size(1) do
    -- perturb x[d]
    xcopy[j] = xcopy[j] + eps
    x[j] = x[j] - eps
    -- form finite difference: (f(x+eps,A,b) - f(x-eps,A,b))/(2*eps)
    local diff = f(xcopy,A,b) - f(x,A,b)
    local finiteDiff = diff/(2*eps)
    -- now compare to our analytic gradient
    print(grad[j], finiteDiff)
    assert(torch.abs(grad[j]-finiteDiff) <= 1e-4)
    -- reset x and xcopy
    xcopy[j] = xcopy[j] - eps
    x[j] = x[j] + eps
  end
end

In [ ]:

-- let's check our gradients!
finiteDiffChecker(f,dfdx,x,A,b)

HDF5¶

In [ ]:

-- hdf5 is a format that lets us write and read tensors in both torch and numpy
-- you should install torch-hdf5 by following the instructions at https://github.com/deepmind/torch-hdf5
-- here are some simple examples of using torch-hdf5 to read and write

require 'hdf5'

-- writing
myFile = hdf5.open('myh5file.h5', 'w')
myFile:write('dataset1', torch.rand(5, 5))
-- can write multiple 'datasets' to the same .h5 file
myFile:write('dataset2', torch.randn(3))
-- etc
myFile:close()

-- reading
myFile = hdf5.open('myh5file.h5', 'r')
dataset1 = myFile:read('dataset1'):all()
dataset2 = myFile:read('dataset2'):all()
myFile:close()

Final Note¶

Do NOT do your assignments in an iTorch notebook; they're slow and annoying.