# Advanced Python Workshop - Lecture Notes¶

May 24, 2013

In [2]:
# problem 1
x = 1
y = x
x = 2
print x, y

2 1

In [1]:
# problem 2
x = [1, 2]
y = [x, 5]
x.append(3)
print y

[[1, 2, 3], 5]


## Functions¶

In [3]:
def square(x):
return x*x
print square(4)

16

In [4]:
f = square
print f(4)

16

In [5]:
def fxy(f, x, y):
return f(x) + f(y)

print fxy(square, 3, 4)

25

In [6]:
f = lambda x: x*x
print f(3)

9

In [7]:
print fxy(lambda x: x*x*x, 3, 4)

91

In [8]:
x = ['python', 'perl',
'java', 'c',
print sorted(x)

['c', 'haskell', 'java', 'perl', 'python', 'ruby']

In [9]:
print sorted(x, key=len)

['c', 'perl', 'java', 'ruby', 'python', 'haskell']


Default Arguments

In [10]:
def inc(x, amount=1):
return x+amount

print inc(5)

6

In [11]:
print inc(5, 4)

9

In [13]:
print inc(x=5, amount=4)

9


Lets find out when is default value computed.

In [15]:
def f(x):
print "f is called with", x
return x

print "before defining inc"

def inc(x, amount=f(1)):
return x + amount

print "after defining inc"
print inc(5)

before defining inc
f is called with 1
after defining inc
6


# Iterators and Generators¶

In [17]:
for a in [1, 2, 3, 4]:
print a

1
2
3
4

In [18]:
for a in (1, 2, 3, 4):
print a

1
2
3
4

In [19]:
for k in {"a": 1, "b": 2}:
print k

a
b

In [20]:
for c in "hello":
print c

h
e
l
l
o

In [21]:
",".join(["a", "b", "c"])

Out[21]:
'a,b,c'
In [22]:
",".join({"a": 1, "b": 2})

Out[22]:
'a,b'
In [23]:
",".join("hello")

Out[23]:
'h,e,l,l,o'
In [24]:
max([1, 2, 3, 4])

Out[24]:
4
In [25]:
max("hello")

Out[25]:
'o'
In [26]:
max({"a": 1, "b": 2})

Out[26]:
'b'

Lets try to understand how iteration works.

In [27]:
x = iter([1, 2, 3, 4])

In [29]:
x.next()

Out[29]:
1
In [30]:
x.next()

Out[30]:
2
In [31]:
x.next()

Out[31]:
3
In [32]:
x.next()

Out[32]:
4
In [33]:
x.next()

---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-33-e05f366da090> in <module>()
----> 1 x.next()

StopIteration: 
In [34]:
x = iter("abc")
x.next()

Out[34]:
'a'
In [35]:
x.next()

Out[35]:
'b'
In [36]:
x.next()

Out[36]:
'c'
In [37]:
x.next()

---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-37-e05f366da090> in <module>()
----> 1 x.next()

StopIteration: 
In [41]:
class yrange:
def __init__(self, n):
self.i = 0
self.n = n

def __iter__(self):
return self

def next(self):
i = self.i
if i < self.n:
self.i = i + 1
return i
else:
raise StopIteration()

y = yrange(5)
for a in y:
print a

0
1
2
3
4


Lets try to see how for loop is behind the scenes.

for a in x:
print a



Translate this in to while loop.

it = iter(x)
while True:
try:
a = it.next()
except StopIteration:
break
print a
In [44]:
# [1, 2, 3, 4] is an iterable object.
# x = iter([1, 2, 3, 4]) gives an iterator.
# next() method can be called on an iterator.

y = yrange(5)
print list(y)
print list(y)

[0, 1, 2, 3, 4]
[]

In [46]:
class zrange:
def __init__(self, n):
self.n = n
def __iter__(self):
return yrange(self.n)

z = zrange(5)
print list(z)
print list(z)

[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4]


## Generators¶

In [50]:
def yrange(n):
i = 0
while i < n:
yield i
i += 1

y = yrange(3)
print y.next()

0

In [51]:
y.next()

Out[51]:
1
In [52]:
y.next()

Out[52]:
2
In [53]:
y.next()

---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-53-75a92ee8313a> in <module>()
----> 1 y.next()

StopIteration: 
In [55]:
def f():
print "begin f"
yield 1
print "after yielding 1"
yield 2
print "end"

a = f()
print a

<generator object f at 0x10270f050>

In [56]:
a.next()

begin f

Out[56]:
1
In [57]:
a.next()

after yielding 1

Out[57]:
2
In [58]:
a.next()

---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-58-aa817a57a973> in <module>()
----> 1 a.next()

StopIteration: 
end

In [59]:
max(yrange(4))

Out[59]:
3
In [60]:
sum(yrange(4))

Out[60]:
6
In [61]:
def squares(numbers):
for n in numbers:
yield n*n

print sum(squares(xrange(1000000)))

333332833333500000

In [62]:
%%file a.txt
1
2
3
4
5

Writing a.txt

In [63]:
def toint(strings):
for s in strings:
yield int(s)

print sum(toint(open("a.txt")))

15

In [64]:
print sum(squares(toint(open("a.txt"))))

55

In [66]:
# the regular way is
result = 0
for line in open("a.txt"):
n = int(line)
result += n
print result

15


Problem: Write a function joiniters, that takes 2 iterators and returns a combined iterator.

print sum(joiniters([1, 2, 3], [4, 5, 6]))
for a in joiniters([1, 2, 3], "hello"):
print a

print list(joiniters(iter([1, 2]), iter([3, 4])))

Problem: Write a function iterappend, that takes 2 arguments, an iterator and a value and return a new iterator containing all the elements of the given iterator and the given value.

>>> list(iterappend([1, 2], 3))
[1, 2, 3]
In [67]:
# Solution to joiniters
def joiniters(x, y):
for a in x:
yield a
for b in y:
yield b

# solution to iterappend
def iterappend(x, end):
return joiniters(x, [end])

print sum(iterappend([1, 2], 3))


## Quick Introduction to List Comprehensions¶

In [68]:
x = range(10)

In [71]:
[a*a for a in x]

Out[71]:
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
In [72]:
[a*a for a in x if a % 2 == 0]

Out[72]:
[0, 4, 16, 36, 64]
In [73]:
%%file square.py
def square(x):
return x*x
def cube(x):
return x*x*x

Writing square.py

In [74]:
# Find all line containing function definations
[line for line in open("square.py") if line.startswith("def")]

Out[74]:
['def square(x):\n', 'def cube(x):\n']
In [75]:
# fine all function names

In [78]:
[line.split("(")[0][len("def "):] for line in open("square.py")
if line.startswith("def")]

Out[78]:
['square', 'cube']
In [79]:
%%file a.csv
a,b,c
1,2,3
1,4,9
1,8,27

Writing a.csv

In [83]:
[line.strip("\n").split(",") for line in open("a.csv")]

Out[83]:
[['a', 'b', 'c'], ['1', '2', '3'], ['1', '4', '9'], ['1', '8', '27']]
In [84]:
def squares(values):
return [x*x for x in values]
print sum(squares(xrange(1000000)))

333332833333500000


## Generator Expressions¶

In [85]:
squares_list = [x*x for x in xrange(1000000)]

In [86]:
squares_gen = (x*x for x in xrange(1000000))

In [87]:
squares_gen

Out[87]:
<generator object <genexpr> at 0x10270f460>
In [89]:
sum(squares_gen)

Out[89]:
333332833333500000
In [91]:
sum((x*x for x in xrange(1000000)))

Out[91]:
333332833333500000
In [92]:
sum(x*x for x in xrange(1000000))

Out[92]:
333332833333500000

Problem Write function squares that takes a iterable over numbers as argument and returns an iterator over their squares. Use generator expressions for doing this.

print sum(squares(xrange(1000)))

In [93]:
def grep(pattern, fileobj):
return (line for line in fileobj if pattern in line)

In [94]:
def printlines(lines):
for line in lines:
print line.strip("\n")

In [100]:
fileobj = open("square.py")
lines = grep("def", fileobj)
printlines(lines)

def square(x):
def cube(x):


Lets try to make the program search in multiple files instead of just single one.

In [101]:
%%file hello.py
def hello(name):
print "hello", name

Writing hello.py

In [105]:
def joiniters(x, y):
for a in x: yield a
for b in y: yield b

fileobj1 = open("square.py")
fileobj2 = open("hello.py")
lines = joiniters(fileobj1, fileobj2)
lines = grep("def", lines)
printlines(lines)

def square(x):
def cube(x):
def hello(name):


Lets extract that into useful function.

In [106]:
def readfiles(filenames):
"""Reads all files and returns iterator over lines."""
for filename in filenames:
for line in open(filename):
yield line

In [107]:
lines = readfiles(["square.py", "hello.py"])
lines = grep("def", lines)
printlines(lines)

def square(x):
def cube(x):
def hello(name):


Lets say some files are compressed using gzip and we want our program to read them as well.

In [114]:
!gzip hello.py

gzip: hello.py: No such file or directory

In [115]:
!ls *.gz

hello.py.gz

In [113]:
import gzip
def xopen(filename):
if filename.endswith(".gz"):
return gzip.open(filename)
else:
return open(filename)

"""Reads all files and returns iterator over lines."""
for filename in filenames:
for line in xopen(filename):
yield line

lines = grep("def", lines)
printlines(lines)

def square(x):
def cube(x):
def hello(name):


Problem: Write a function countiter to count number of elements in an iterator.

>>> countiter(xrange(100))
100
>>> countiter(x for x in xrange(100) if x % 2 == 0)
50

Problem: Write a function to linecount to count number of lines in a given file.

print linecount("square.py")

Problem: Write a function wordcount to count number of words in a file.

print wordcount("square.py")
In [112]:
# countiter solution
def countiter(it):
count = 0
for x in it:
count += 1
return count

def countiter(it):
return sum(1 for x in it)


#### The itertools module¶

In [116]:
import itertools

print list(itertools.chain([1, 2, 3, 4], [5, 6]))

[1, 2, 3, 4, 5, 6]

In [117]:
for a, b in itertools.izip("hello", "world"):
print a, b

h w
e o
l r
l l
o d


Problem: Implement izip function.

In [118]:
x = itertools.izip("hello", "world")
print x.next()

('h', 'w')


Problem: Implement a function numbers that generate an infinite sequence of numbers starting from 0.

>>> n = numbers()
>>> n.next()
0
>>> n.next()
1
>>> n.next()
2    
In [121]:
# solution to izip

def izip(x, y):
x = iter(x)
y = iter(y)
while True:
yield x.next(), y.next()

for a, b in izip([1, 2, 3], "hello"):
print a, b

1 h
2 e
3 l

In [122]:
for i, c in enumerate("hello"):
print i, c

0 h
1 e
2 l
3 l
4 o

In [124]:
def myenumerate(it):
return izip(numbers(), it)

def numbers():
i = 0
while True:
yield i
i += 1

for i, c in myenumerate("hello"):
print i, c

0 h
1 e
2 l
3 l
4 o


# Functional Programming¶

## Recursion¶

In [131]:
def exp(x, n):
print "exp", x, n
if n == 0:
return 1
else:
return x * exp(x, n-1)

print exp(2, 10)

exp 2 10
exp 2 9
exp 2 8
exp 2 7
exp 2 6
exp 2 5
exp 2 4
exp 2 3
exp 2 2
exp 2 1
exp 2 0
1024

In [135]:
def fast_exp(x, n):
print "fast_exp", x, n
if n == 0:
return 1
elif n % 2 == 0:
return fast_exp(x*x, n/2)
else:
return x * fast_exp(x, n-1)

print fast_exp(2, 100)

fast_exp 2 100
fast_exp 4 50
fast_exp 16 25
fast_exp 16 24
fast_exp 256 12
fast_exp 65536 6
fast_exp 4294967296 3
fast_exp 4294967296 2
fast_exp 18446744073709551616 1
fast_exp 18446744073709551616 0
1267650600228229401496703205376


Product: Write a function product to compute product of 2 numbers, using + and - operators only.

Example: Flatten list

In [141]:
def flatten_list(x, result=None):
"""Flattens a nested list.

>>> flatten_list([[1, 2], [3, 4, [5]]])
[1, 2, 3, 4, 5]
"""
if result is None:
result = []

for a in x:
if isinstance(a, list):
flatten_list(a, result)
else:
result.append(a)
return result

print flatten_list([1, 2, 3])
print flatten_list([[1, 2], [3, 4, [5]]])

[1, 2, 3]
[1, 2, 3, 4, 5]


Problem: Write a function flatten_dict to flatten a nested dictionary by joining the keys with . character.

>>> flatten_dict({'a': 1, 'b': {'x': 2, 'y': 3}, 'c': 4})
{'a': 1, 'b.x': 2, 'b.y': 3, 'c': 4}
In [148]:
def flatten_dict(d, result=None, prefix=None):
if result is None:
result = {}

for k, v in d.items():
if prefix is None:
key = k
else:
key = prefix + "." + k
if isinstance(v, dict):
flatten_dict(v, result, prefix=key)
else:
result[key] = v
return result

flatten_dict({'a': 1, 'b': {'x': 2, 'y': 3, 'z': {'p': 5}}, 'c': 4})

Out[148]:
{'a': 1, 'b.x': 2, 'b.y': 3, 'b.z.p': 5, 'c': 4}

#### Example: JSON Encode¶

In [151]:
def json_encode(data):
if isinstance(data, bool):
if data:
return "true"
else:
return "false"
elif isinstance(data, (int, float)):
return str(data)
elif isinstance(data, str):
return '"' + data + '"'
elif isinstance(data, list):
elements = [json_encode(d) for d in data]
values = ", ".join(elements)
return "[" + values + "]"

print json_encode(True)
print json_encode(1.234)
print json_encode([1, 2, 3, True, "hello", [3, 4]])
print json_encode({"a": [1, True], "b": {"name": "hello"}})
# {"a": [1, true], "b": {"name": "hello"}}

true
1.234
[1, 2, 3, true, "hello", [3, 4]]


### Higher Order Functions¶

#### Example: Tracing Function Calls¶

In [187]:
indent = 0
def trace(f):
def g(n):
global indent
print "| " * indent + "|-- " + f.__name__, n
indent += 1
value = f(n)
indent -= 1
return value
return g

def memoize(f):
cache = {}
def g(n):
if n not in cache:
cache[n] = f(n)
return cache[n]
return g

import time
#fib = trace(fib)
#fib = memoize(fib)

@memoize
@trace
def fib(n):
if n == 0 or n == 1:
return 1
else:
return fib(n-1) + fib(n-2)

t0 = time.time()
print fib(5)
t1 = time.time()
print "took %f seconds" % (t1-t0)

|-- fib 5
| |-- fib 4
| | |-- fib 3
| | | |-- fib 2
| | | | |-- fib 1
| | | | |-- fib 0
8
took 0.000574 seconds

In [ ]:
def profile(f):
def g():
...
return g

def timepass():
for i in range(100000):
for j in range(100):
x = i*j

timepass = profile(timepass)
timepass()