Variables and Printing

In [ ]:
--assignment and printing 

a, b = 24, "tacos" -- can assign tuple-style, like in python 
c = 'please' -- can enclose string literals in single or double quotes
print(a, b, c, "\n")
In [ ]:
-- string concatenation

d = b .. ', ' .. c

Scalar Math

In [ ]:
-- syntax similar to MATLAB
print(2*a, a^2, a%2, "\n")
In [ ]:
-- note that all numbers are implicitly floats/doubles!
print(a/7, "\n")
In [ ]:
-- if you want to ensure you get something integral, use math.ceil or math.floor
print(math.floor(a/7), math.ceil(a/7), "\n")
In [ ]:
-- some other useful math functions
print(math.min(1, 22, 44), math.max(1, 22, 44), "\n")

Control Flow

In [ ]:
-- while loops are enclosed in while-do-end blocks
i = 1
while i < 3 do
    i = i + 1 -- N.B. no 'i += 1' or 'i++' syntax in Lua 
In [ ]:
-- for-loops iterate over a range of numbers, INCLUSIVE!
for i = 3, 5 do
In [ ]:
-- like in python, you can specify the step size with a 3rd loop argument
for i = 10, 1, -4 do
In [ ]:
-- conditional statements go in if-then-elseif-else-end blocks
val = 24

if val == 0 then
elseif val%2 == 0 then
    print("even and nonzero!")
elseif val ~= 13 then           -- N.B. Lua uses '~=' to mean '!='; also works for strings!
    print("odd and not 13!")
    print("everything else!")
In [ ]:
-- lua allows the 'break' keyword
for i = 1, 3 do
    if i % 2 == 0 then

-- but it doesn't have 'continue'; 
-- see and 
-- for some workarounds

Truth and Falsity

In [ ]:
-- nil and false evaluate to false
a, b =  nil, false

-- everything else evaluates to true
c, d = "taco", 0

if a or b then
elseif c and d then
In [ ]:
-- 'and' and 'or' have interesting side effects; allow for 'ternary if' as follows:
val2 = a and 1 or 2 -- a is falsey, so we get 2
print(val2, "\n")

val3 = c and 3 or 4 --  c is truthy, so we get 3
print(val3, "\n")


In [ ]:
-- local vs global variables

var = 22 -- global

function f1()
    local var = 33 -- N.B. local variables generally lead to faster code! 
    return var + 1

print(f1(), "\n")

function f2()
    return var + 1

print(f2(), "\n")
In [ ]:
-- default and extra arguments

function encodeDigits(a, b, c)
    local a = a or 0 -- common convention for specifying default args
    local b = b or 0
    local c = c or 0
    assert(a >= 0 and a < 10)
    assert(b >= 0 and b < 10)
    assert(c >= 0 and c < 10)    
    return a*1 + b*10 + c*100

print(encodeDigits(1, 2, 3),"\n") -- no defaults used
print(encodeDigits(2),"\n") -- defaults for b and c used
print(encodeDigits(nil, 2),"\n") -- defaults for a and c used
print(encodeDigits(), "\n") -- all defaults used
print(encodeDigits(1, 2, 3, 4),"\n") -- 4th argument ignored
In [ ]:
-- returning multiple values

function divWithRemainder(a, b)
    return math.floor(a/b), a%b

d, r = divWithRemainder(10, 3)
print(d, r, "\n")

-- if you attempt to place multiple values in a single variable, lua just forgets the values after the first
d = divWithRemainder(10 ,3)
print(d) -- N.B. you don't get a tuple like in python; just 3

-- (function stuff outside the scope of this tutorial: functions are first class objects, closures)

Tables (more or less the only native data-structure provided by Lua)

Tables as Dictionaries

In [ ]:
-- tables can be used as hash-based associative arrays (like python dictionaries)
t1 = {} -- construct an empty table
t1["one"] = 1
t1["two"] = 2
t1[3] = "three"
print(t1, "\n")

t2 = {["one"]=1, ["two"]=2, [3]="three"} -- constructing a table literal
print(t2, "\n")
In [ ]:
-- can access string attributes either with brackets, or with dot notation
print(t2["one"], t2[3], "\n")
In [ ]:
-- iterating over key, value pairs
for k,v in pairs(t1) do
In [ ]:
-- remove elements from dictionaries by setting to nil
t1["one"] = nil

Tables as (ordered) arrays

In [ ]:
-- when a table uses only integer keys 1..n, it can also function as an array!
-- N.B. Tables (and tensors) are 1-indexed!!!
arr = {} -- construct an empty array
arr[1] = "one"
arr[2] = "two"
arr[3] = "three"

arr2 = {"one", "two", "three"} -- construct an array literal
In [ ]:
-- can get the length of an array by prepending with '#'

--N.B. '#' only works with array-style tables (and not with dictionary-style tables)
  -- If you want to get the size of a dictionary in constant time, you need to store it somewhere; gross!
ugh = {["one"]=1, ["two"]=2}
print(#ugh,"\n") -- misleading!
In [ ]:
-- instead to using integer keys to index, can also append to table as follows
arr3 = {}
table.insert(arr3, "one") -- equivalent to t[#t+1] = "one"
table.insert(arr3, "two")
In [ ]:
-- can iterate over an array in order as follows
for i, el in ipairs(arr2) do -- ipairs() is like enumerate() in python
    print(i, el)
In [ ]:
-- to remove elements from array, use table.remove (which is inefficient)


Tensor Basics

In [ ]:
--[[ Tensors are multi-dimensional generalizations of arrays/matrices, and are the primary data-structure provided
     by Torch (just as arrays are the primary data-structure providedy by Numpy). Tensors are great, and anytime  
     you can use them you probably should.

     Also check out for documentation on Tensor objects,
     and for documentation on mathematical operations
     defined on Tensors

-- here are some ways of constructing Tensors (of different sizes and dimensions)
A = torch.Tensor(3, 3) -- an empty 3x3 Tensor (initialized with garbage)
B = torch.zeros(3, 3, 2) -- 3x3x2 Tensor initalized with zeros
C = torch.ones(3, 1, 3)  -- 3x1x3 Tensor initialized with ones
D = torch.randn(2) -- 2-vector (still a Tensor) initialized with standard gaussian noise
E = torch.rand(1, 1, 1, 1)  -- 1x1x1x1 Tensor initialized with uniform noise
F = torch.Tensor({{1, 1}, {2, 2}}) -- 2x2 literal tensor

In [ ]:
-- by default Tensor() gives you a "real" Tensor, and you can set whether "real" defaults to float or double.
-- if you want to explicitly pick one, there are also specialized constructors
A = torch.FloatTensor(3, 3)
B = torch.LongTensor(3, 3) -- N.B. LongTensors hold integers and are very important; we use them to store indices
In [ ]:
-- some important ways to get Tensor metadata
A = torch.randn(2,3)
print(A:dim(),"\n") -- number of dimensions
print(A:size(1),"\n") -- size along 1st dimension; can do any (existing) dimension e.g. A:size(2)
print(A:size()) -- gives a data structure with sizes of ALL dimensions; not actually that useful
print(A:nElement(),"\n") -- total number of element
print(A:isContiguous()) -- does Tensor address a contiguous block of memory

Views on Tensors

In [ ]:
-- can "view" a tensor in a different shape without doing any copy
a = torch.range(1,6) -- numbers 1 thru 6
A = a:view(2,3) -- the ':' notation implicitly adds 'self' to a function call (when defined on objects)

-- note view() reshapes along rows (like C and numpy), not along columns (like fortran and R) 
B = A:view(3,2)

-- note a, A, and B address the same memory!
B:zero() -- zeroes out a tensor

Accessing Sub-Tensors

In [ ]:
-- index notation allows you to index along the first dimension
A = torch.range(1,6):view(2,3)
firstRow = A[1]

-- this does no memory copy!
In [ ]:
-- select() allows you to index along any dimension
firstCol = A:select(2,1) -- select()'s first argument is the desired dimension
-- also does no memory copy!
In [ ]:
-- instead of accessing a single index, can narrow a Tensor along a chosen dimension
firstRow = A:narrow(1,1,1) -- arguments are dim, startIdx, length along dim to extract
last2Cols = A:narrow(2,2,2)

-- this also addresses SAME memory as in original tensor
In [ ]:
-- while narrow() calls can be chained, can also use sub() to narrow along first 4 dimensions
A = torch.range(1,6):view(2,3)
firstRow = A:sub(1,1) -- arguments are start and stop idx (inclusive) for each dimension (up to 4)
last2Cols = A:sub(1,2,2,3) -- using start and stop indices for first two dimensions here
bottomRight = A:sub(2,2,3,3)


-- as above, this addresses same memory as original
In [ ]:
-- instead of using sub() and narrow(), can also specify ranges by indexing with tables
firstRow = A[{{1,1},{}}] -- expects table of range-tables, 1 for each dimension; empty table means everything
last2Cols = A[{{},{2,3}}] -- note similarity to sub()
bottomRight = A[{{2,2},{3,3}}]


Sparse Indexing

In [ ]:
-- we can select non-contiguous items along the first dimension using index()
A = torch.range(1,9):view(3,3)
idxs = torch.LongTensor({1,3}) -- indices are often required to be stored in LongTensors
firstAndThirdRows = A:index(1,idxs) -- first argument is the dimension

--N.B. index() does a memory copy!
In [ ]:
-- can also update a matrix sparsely with indexAdd()
A = torch.zeros(3,3)
idxs = torch.LongTensor({1,3})
U = torch.randn(2,3)
A:indexAdd(1,idxs,U) -- U must be of dimension idxs:size(1) x A:size(2)
-- there's also indexFill() and indexCopy(); see the documentation!

In-place Operations vs. Copying Operations (Very Important!)

In [ ]:
-- for most (mathematical) operations defined on tensors, you will have a choice between allocating new memory for
-- the result of the operation, or placing the result in some already-allocated tensor.
-- for example, let's consider taking the element-wise absolute value of a tensor A
A = torch.randn(3,3)

-- if we want to allocate a NEW tensor B s.t. B = abs(A), we do the following
B = torch.abs(A) -- in general, using torch.f to call a function f on a tensor will allocate new memory

-- let's make sure A has not changed
In [ ]:
-- suppose instead we have some tensor C lying around that we want to use to store abs(A)
C = torch.Tensor(3,3)
-- we can use C to store abs(A) as follows
C:abs(A) -- recall the ':' notation is short-hand for passing 'self' to a function defined on an object
         -- in general, calling X:f(args) will use X's memory to store the result of f(args)
In [ ]:
-- often it is convenient to use a tensor to store its own result
-- now A has changed
In [ ]:
-- the in-place/copying distinction is important for 2 reasons:
-- 1) doing in-place operations is generally much faster than allocating memory repeatedly (e.g., in a loop)
-- 2) on the other hand, easy to mess up your data by accidentally doing things in-place

Element-wise mathematical operations

In [ ]:
-- many useful elementwise operations are defined, and (as above), can be used in-place or not
-- can add or multiply by constant scalars

Row or Column-wise Operations

In [ ]:
-- can take sum, mean, stddev of rows or columns as follows
A = torch.randn(2,3)
colSums = A:sum(1) -- sum along first dimension; can also do A:mean(1), A:std(1), etc
rowSums = A:sum(2) -- sum along second dimension; can also do A:mean(2), A:std(2), etc
In [ ]:
-- torch combines max and argmax() 
a = torch.range(2,6)
maxval, argmax = a:max(1) -- argument specifies dimension
print(maxval, argmax)
-- can also take min()

Linear Algebra

In [ ]:
-- adding and multiplying tensors
A = torch.randn(2,3)
B = torch.randn(2,3)
A:add(B) -- puts A+B in A; if want new memory, do torch.add(A,B)
B:cmul(A) -- puts ELEMENTWISE multiplication of A and B in B
B:cdiv(A) -- puts ELEMENTWISE division B/A in B
In [ ]:
-- matrix multiplication
Ans = torch.Tensor(2,2) -- we'll use this to store an answer
-- computes (A B^T) and puts it in Ans
Ans:mm(A,B:t())  -- N.B. B:t() transposes B

-- dot products
dotprod = A:dot(B) -- note, A and B don't need to be vectors (that is, they can have dim > 1)

-- matrix-vector products
mvAns = torch.Tensor(2) -- stores mv-prod answer
v = torch.randn(3) -- 1 dimensional, so a vector
mvAns:mv(A,v) -- note could also have done mvAns:view(2,1):mm(A,v:view(3,1))

"Add"-style Linear Algebra

In [ ]:
-- instead of overwriting memory w/ a result, you may want to add it to something already computed
u = torch.ones(3)
v = torch.Tensor(3):fill(2.2)
w = torch.Tensor(3):fill(2)
c = 3
-- compute u = u + c * (v .* w), where .* is elementwise multiplication
-- N.B. can also do addcdiv(), which will often be very handy!
In [ ]:
-- add-style matrix multiplication
Ans = torch.ones(3,3)
A = torch.eye(3) -- torch.eye makes an identity matrix
B = torch.eye(3):mul(2)
Ans:addmm(c,A,B) -- N.B. many more options here; see the documentation!
In [ ]:
-- possible to use infix notation like in numpy/matlab, but it always does a copy, so try to avoid it!
A = torch.randn(3,3)
B = torch.randn(3,3)
A = A + B
C = A * B
-- etc



To tie everything together, let's implement finite difference gradient-checking for a simple function

Let's consider the quadratic function $$f(\mathbf{x}) = \frac{1}{2} \mathbf{x}^{T} \mathbf{A} \mathbf{x} + \mathbf{b}^T \mathbf{x} \,$$, where $\mathbf{A} \,$ is symmetric and $\mathbf{x} \in \mathbb{R}^D$.

The gradient in this case is $$ \nabla_{\mathbf{x}}(f) = \langle \frac{\partial f}{\partial x_1}, \ldots, \frac{\partial f}{\partial x_D} \rangle = \mathbf{A} \mathbf{x} + \mathbf{b} \,$$

Suppose that we compute the gradient above analytically (in code). We can use finite-differences to check that our implementation is bug-free. First, define the masking vector $\mathbf{m}^j = \epsilon \cdot \mathbf{\delta}(j)$, where $\mathbf{m}, \mathbf{\delta} \in \mathbb{R}^D$ and $\epsilon$ is some very small scalar value (e.g., 0.00001).

If our computed gradient is correct, we expect $$ \frac{\partial f}{\partial x_j} \approx \frac{f(\mathbf{x} + \mathbf{m}^j) - f(\mathbf{x} - \mathbf{m}^j)}{2\epsilon} \, $$ The term on the RHS of the equation above is the ``central'' finite difference wrt $x_j$.

We'll implement $f$, its gradient, and a finite difference checker below!

(P.S. I have no idea how to prevent those bars to the right of the equations from showing up, but ignore them...)

In [ ]:
-- let's define some global memory we'll update, and some fixed, global parameters
buf = nil
grad = nil

D = 3 -- dimensionality of x
A = torch.randn(D,D)
-- ensure symmetric (note this does a memory copy!)
A = A + A:t()
b = torch.randn(D)
x = torch.randn(D)
In [ ]:
-- implementation of f(x) = x^T A x + b^T x
function f(x,A,b) 
  if buf == nil then
    buf = torch.Tensor()
  -- first do Ax
  return buf:dot(x)*0.5 + b:dot(x)
In [ ]:
-- implementation of \nabla_x(f) = Ax + b
function dfdx(x,A,b)
  if grad == nil then
    grad = torch.Tensor()
  return grad
In [ ]:
function finiteDiffChecker(f,dfdx,x,A,b)
  -- first let's compute the gradient at our current point
  local grad = dfdx(x,A,b)
  -- now let's check it with finite differences
  local eps = 1e-5 -- use this for computing finite diffs
  local xcopy = x:clone()
  print("grad", "         finite-difference")
  for j = 1, grad:size(1) do
    -- perturb x[d]
    xcopy[j] = xcopy[j] + eps
    x[j] = x[j] - eps
    -- form finite difference: (f(x+eps,A,b) - f(x-eps,A,b))/(2*eps)
    local diff = f(xcopy,A,b) - f(x,A,b)
    local finiteDiff = diff/(2*eps)
    -- now compare to our analytic gradient
    print(grad[j], finiteDiff)
    assert(torch.abs(grad[j]-finiteDiff) <= 1e-4)
    -- reset x and xcopy
    xcopy[j] = xcopy[j] - eps
    x[j] = x[j] + eps
In [ ]:
-- let's check our gradients!


In [ ]:
-- hdf5 is a format that lets us write and read tensors in both torch and numpy
-- you should install torch-hdf5 by following the instructions at
-- here are some simple examples of using torch-hdf5 to read and write

require 'hdf5'

-- writing
myFile ='myh5file.h5', 'w')
myFile:write('dataset1', torch.rand(5, 5))
-- can write multiple 'datasets' to the same .h5 file
myFile:write('dataset2', torch.randn(3))
-- etc

-- reading
myFile ='myh5file.h5', 'r')
dataset1 = myFile:read('dataset1'):all()
dataset2 = myFile:read('dataset2'):all()

Final Note

  • Do NOT do your assignments in an iTorch notebook; they're slow and annoying.