Advanced Python Workshop - Lecture Notes

May 24, 2013

Lets start with warm up problems. http://anandology.com/apy/slides/python-warmup.html

In [2]:
# problem 1
x = 1
y = x
x = 2
print x, y
2 1
In [1]:
# problem 2
x = [1, 2]
y = [x, 5]
x.append(3)
print y
[[1, 2, 3], 5]

Functions

In [3]:
def square(x): 
    return x*x
print square(4)
16
In [4]:
f = square
print f(4)
16
In [5]:
def fxy(f, x, y):
    return f(x) + f(y)

print fxy(square, 3, 4)
25
In [6]:
f = lambda x: x*x
print f(3)
9
In [7]:
print fxy(lambda x: x*x*x, 3, 4)
91
In [8]:
x = ['python', 'perl', 
     'java', 'c', 
     'haskell', 'ruby']
print sorted(x)
['c', 'haskell', 'java', 'perl', 'python', 'ruby']
In [9]:
print sorted(x, key=len)
['c', 'perl', 'java', 'ruby', 'python', 'haskell']

Default Arguments

In [10]:
def inc(x, amount=1):
    return x+amount

print inc(5)
6
In [11]:
print inc(5, 4)
9
In [13]:
print inc(x=5, amount=4)
9

Lets find out when is default value computed.

In [15]:
def f(x):
    print "f is called with", x
    return x

print "before defining inc"

def inc(x, amount=f(1)):
    return x + amount

print "after defining inc"
print inc(5)
before defining inc
f is called with 1
after defining inc
6

Iterators and Generators

In [17]:
for a in [1, 2, 3, 4]: 
    print a
1
2
3
4
In [18]:
for a in (1, 2, 3, 4):
    print a
1
2
3
4
In [19]:
for k in {"a": 1, "b": 2}: 
    print k
a
b
In [20]:
for c in "hello":
    print c
h
e
l
l
o
In [21]:
",".join(["a", "b", "c"])
Out[21]:
'a,b,c'
In [22]:
",".join({"a": 1, "b": 2})
Out[22]:
'a,b'
In [23]:
",".join("hello")
Out[23]:
'h,e,l,l,o'
In [24]:
max([1, 2, 3, 4])
Out[24]:
4
In [25]:
max("hello")
Out[25]:
'o'
In [26]:
max({"a": 1, "b": 2})
Out[26]:
'b'

Lets try to understand how iteration works.

In [27]:
x = iter([1, 2, 3, 4])
In [29]:
x.next()
Out[29]:
1
In [30]:
x.next()
Out[30]:
2
In [31]:
x.next()
Out[31]:
3
In [32]:
x.next()
Out[32]:
4
In [33]:
x.next()
---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-33-e05f366da090> in <module>()
----> 1 x.next()

StopIteration: 
In [34]:
x = iter("abc")
x.next()
Out[34]:
'a'
In [35]:
x.next()
Out[35]:
'b'
In [36]:
x.next()
Out[36]:
'c'
In [37]:
x.next()
---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-37-e05f366da090> in <module>()
----> 1 x.next()

StopIteration: 
In [41]:
class yrange:
    def __init__(self, n):
        self.i = 0
        self.n = n
        
    def __iter__(self):
        return self
        
    def next(self):
        i = self.i
        if i < self.n:
            self.i = i + 1
            return i
        else:
            raise StopIteration()
            
y = yrange(5)
for a in y:
    print a
0
1
2
3
4

Lets try to see how for loop is behind the scenes.

for a in x:
    print a

Translate this in to while loop.

it = iter(x)
while True:
    try:
        a = it.next()
    except StopIteration:
        break
    print a
In [44]:
# [1, 2, 3, 4] is an iterable object.
# x = iter([1, 2, 3, 4]) gives an iterator.
# next() method can be called on an iterator.

y = yrange(5)
print list(y)
print list(y)
[0, 1, 2, 3, 4]
[]
In [46]:
class zrange:
    def __init__(self, n):
        self.n = n
    def __iter__(self):
        return yrange(self.n)
    
z = zrange(5)
print list(z)
print list(z)
[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4]

Generators

In [50]:
def yrange(n):
    i = 0
    while i < n:
        yield i
        i += 1

y = yrange(3)
print y.next()
0
In [51]:
y.next()
Out[51]:
1
In [52]:
y.next()
Out[52]:
2
In [53]:
y.next()
---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-53-75a92ee8313a> in <module>()
----> 1 y.next()

StopIteration: 
In [55]:
def f():
    print "begin f"
    yield 1
    print "after yielding 1"
    yield 2
    print "end"
    
a = f()
print a
<generator object f at 0x10270f050>
In [56]:
a.next()
begin f
Out[56]:
1
In [57]:
a.next()
after yielding 1
Out[57]:
2
In [58]:
a.next()
---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-58-aa817a57a973> in <module>()
----> 1 a.next()

StopIteration: 
end
In [59]:
max(yrange(4))
Out[59]:
3
In [60]:
sum(yrange(4))
Out[60]:
6
In [61]:
def squares(numbers):
    for n in numbers:
        yield n*n
        
print sum(squares(xrange(1000000)))
333332833333500000
In [62]:
%%file a.txt
1
2
3
4
5
Writing a.txt
In [63]:
def toint(strings):
    for s in strings:
        yield int(s)
        
print sum(toint(open("a.txt")))
15
In [64]:
print sum(squares(toint(open("a.txt"))))
55
In [66]:
# the regular way is
result = 0
for line in open("a.txt"):
    n = int(line)
    result += n
print result
15

Problem: Write a function joiniters, that takes 2 iterators and returns a combined iterator.

print sum(joiniters([1, 2, 3], [4, 5, 6]))
for a in joiniters([1, 2, 3], "hello"):
    print a

print list(joiniters(iter([1, 2]), iter([3, 4])))

Problem: Write a function iterappend, that takes 2 arguments, an iterator and a value and return a new iterator containing all the elements of the given iterator and the given value.

>>> list(iterappend([1, 2], 3))
[1, 2, 3]
In [67]:
# Solution to joiniters
def joiniters(x, y):
    for a in x:
        yield a
    for b in y:
        yield b
        
# solution to iterappend
def iterappend(x, end):
    return joiniters(x, [end])

print sum(iterappend([1, 2], 3))

Quick Introduction to List Comprehensions

In [68]:
x = range(10)
In [71]:
[a*a for a in x]
Out[71]:
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
In [72]:
[a*a for a in x if a % 2 == 0]
Out[72]:
[0, 4, 16, 36, 64]
In [73]:
%%file square.py
def square(x):
    return x*x
def cube(x):
    return x*x*x
Writing square.py
In [74]:
# Find all line containing function definations
[line for line in open("square.py") if line.startswith("def")]
Out[74]:
['def square(x):\n', 'def cube(x):\n']
In [75]:
# fine all function names
In [78]:
[line.split("(")[0][len("def "):] for line in open("square.py") 
                    if line.startswith("def")]
Out[78]:
['square', 'cube']
In [79]:
%%file a.csv
a,b,c
1,2,3
1,4,9
1,8,27
Writing a.csv
In [83]:
[line.strip("\n").split(",") for line in open("a.csv")]
Out[83]:
[['a', 'b', 'c'], ['1', '2', '3'], ['1', '4', '9'], ['1', '8', '27']]
In [84]:
def squares(values):
    return [x*x for x in values]
print sum(squares(xrange(1000000)))
333332833333500000

Generator Expressions

In [85]:
squares_list = [x*x for x in xrange(1000000)]
In [86]:
squares_gen = (x*x for x in xrange(1000000))
In [87]:
squares_gen
Out[87]:
<generator object <genexpr> at 0x10270f460>
In [89]:
sum(squares_gen)
Out[89]:
333332833333500000
In [91]:
sum((x*x for x in xrange(1000000)))
Out[91]:
333332833333500000
In [92]:
sum(x*x for x in xrange(1000000))
Out[92]:
333332833333500000

Problem Write function squares that takes a iterable over numbers as argument and returns an iterator over their squares. Use generator expressions for doing this.

print sum(squares(xrange(1000)))

Example: Reading multiple files

In [93]:
def grep(pattern, fileobj):
    return (line for line in fileobj if pattern in line)
In [94]:
def printlines(lines):
    for line in lines:
        print line.strip("\n")
In [100]:
fileobj = open("square.py")
lines = grep("def", fileobj)
printlines(lines)
def square(x):
def cube(x):

Lets try to make the program search in multiple files instead of just single one.

In [101]:
%%file hello.py
def hello(name):
    print "hello", name
Writing hello.py
In [105]:
def joiniters(x, y):
    for a in x: yield a
    for b in y: yield b
        
fileobj1 = open("square.py")
fileobj2 = open("hello.py")
lines = joiniters(fileobj1, fileobj2)
lines = grep("def", lines)
printlines(lines)
def square(x):
def cube(x):
def hello(name):

Lets extract that into useful function.

In [106]:
def readfiles(filenames):
    """Reads all files and returns iterator over lines."""
    for filename in filenames:
        for line in open(filename):
            yield line
In [107]:
lines = readfiles(["square.py", "hello.py"])
lines = grep("def", lines)
printlines(lines)
def square(x):
def cube(x):
def hello(name):

Lets say some files are compressed using gzip and we want our program to read them as well.

In [114]:
!gzip hello.py
gzip: hello.py: No such file or directory
In [115]:
!ls *.gz
hello.py.gz
In [113]:
import gzip
def xopen(filename):
    if filename.endswith(".gz"):
        return gzip.open(filename)
    else:
        return open(filename)
    
def readfiles(filenames):
    """Reads all files and returns iterator over lines."""
    for filename in filenames:
        for line in xopen(filename):
            yield line
            
lines = readfiles(["square.py", "hello.py.gz"])
lines = grep("def", lines)
printlines(lines)
def square(x):
def cube(x):
def hello(name):

Problem: Write a function countiter to count number of elements in an iterator.

>>> countiter(xrange(100))
100
>>> countiter(x for x in xrange(100) if x % 2 == 0)
50

Problem: Write a function to linecount to count number of lines in a given file.

print linecount("square.py")

Problem: Write a function wordcount to count number of words in a file.

print wordcount("square.py")
In [112]:
# countiter solution
def countiter(it):
    count = 0
    for x in it:
        count += 1
    return count

def countiter(it):
    return sum(1 for x in it)

The itertools module

In [116]:
import itertools

print list(itertools.chain([1, 2, 3, 4], [5, 6]))
[1, 2, 3, 4, 5, 6]
In [117]:
for a, b in itertools.izip("hello", "world"):
    print a, b
h w
e o
l r
l l
o d

Problem: Implement izip function.

In [118]:
x = itertools.izip("hello", "world")
print x.next()
('h', 'w')

Problem: Implement a function numbers that generate an infinite sequence of numbers starting from 0.

>>> n = numbers()
>>> n.next()
0
>>> n.next()
1
>>> n.next()
2    
In [121]:
# solution to izip

def izip(x, y):
    x = iter(x)
    y = iter(y)
    while True:
        yield x.next(), y.next()
    
for a, b in izip([1, 2, 3], "hello"):
    print a, b
1 h
2 e
3 l
In [122]:
for i, c in enumerate("hello"):
    print i, c
0 h
1 e
2 l
3 l
4 o
In [124]:
def myenumerate(it):
    return izip(numbers(), it)

def numbers():
    i = 0
    while True:
        yield i
        i += 1

for i, c in myenumerate("hello"):
    print i, c
0 h
1 e
2 l
3 l
4 o

Functional Programming

Recursion

In [131]:
def exp(x, n):
    print "exp", x, n
    if n == 0:
        return 1
    else:
        return x * exp(x, n-1)
    
print exp(2, 10)
exp 2 10
exp 2 9
exp 2 8
exp 2 7
exp 2 6
exp 2 5
exp 2 4
exp 2 3
exp 2 2
exp 2 1
exp 2 0
1024
In [135]:
def fast_exp(x, n):
    print "fast_exp", x, n
    if n == 0:
        return 1
    elif n % 2 == 0:
        return fast_exp(x*x, n/2)
    else:
        return x * fast_exp(x, n-1)

print fast_exp(2, 100)
fast_exp 2 100
fast_exp 4 50
fast_exp 16 25
fast_exp 16 24
fast_exp 256 12
fast_exp 65536 6
fast_exp 4294967296 3
fast_exp 4294967296 2
fast_exp 18446744073709551616 1
fast_exp 18446744073709551616 0
1267650600228229401496703205376

Product: Write a function product to compute product of 2 numbers, using + and - operators only.

Example: Flatten list

In [141]:
def flatten_list(x, result=None):
    """Flattens a nested list.

        >>> flatten_list([[1, 2], [3, 4, [5]]])
        [1, 2, 3, 4, 5]
    """
    if result is None:
        result = []
        
    for a in x:
        if isinstance(a, list):
            flatten_list(a, result)
        else:
            result.append(a)
    return result

print flatten_list([1, 2, 3])
print flatten_list([[1, 2], [3, 4, [5]]])
[1, 2, 3]
[1, 2, 3, 4, 5]

Problem: Write a function flatten_dict to flatten a nested dictionary by joining the keys with . character.

>>> flatten_dict({'a': 1, 'b': {'x': 2, 'y': 3}, 'c': 4})
{'a': 1, 'b.x': 2, 'b.y': 3, 'c': 4}
In [148]:
def flatten_dict(d, result=None, prefix=None):
    if result is None:
        result = {}
    
    for k, v in d.items():
        if prefix is None:
            key = k
        else:
            key = prefix + "." + k
        if isinstance(v, dict):
            flatten_dict(v, result, prefix=key)
        else:
            result[key] = v
    return result

flatten_dict({'a': 1, 'b': {'x': 2, 'y': 3, 'z': {'p': 5}}, 'c': 4})
Out[148]:
{'a': 1, 'b.x': 2, 'b.y': 3, 'b.z.p': 5, 'c': 4}

Example: JSON Encode

In [151]:
def json_encode(data):
    if isinstance(data, bool):
        if data:
            return "true"
        else:
            return "false"
    elif isinstance(data, (int, float)):
        return str(data)
    elif isinstance(data, str):
        return '"' + data + '"'
    elif isinstance(data, list):
        elements = [json_encode(d) for d in data]
        values = ", ".join(elements)
        return "[" + values + "]"
        
print json_encode(True)
print json_encode(1.234)
print json_encode([1, 2, 3, True, "hello", [3, 4]])
print json_encode({"a": [1, True], "b": {"name": "hello"}})
# {"a": [1, true], "b": {"name": "hello"}}
true
1.234
[1, 2, 3, true, "hello", [3, 4]]

Higher Order Functions

Example: Tracing Function Calls

In [187]:
indent = 0
def trace(f):
    def g(n):
        global indent
        print "| " * indent + "|-- " + f.__name__, n
        indent += 1
        value = f(n)
        indent -= 1
        return value
    return g

def memoize(f):
    cache = {}
    def g(n):
        if n not in cache:
            cache[n] = f(n)
        return cache[n]
    return g

import time
#fib = trace(fib)
#fib = memoize(fib)

@memoize
@trace
def fib(n):
    if n == 0 or n == 1:
        return 1
    else:
        return fib(n-1) + fib(n-2)

t0 = time.time()
print fib(5)
t1 = time.time()
print "took %f seconds" % (t1-t0)
|-- fib 5
| |-- fib 4
| | |-- fib 3
| | | |-- fib 2
| | | | |-- fib 1
| | | | |-- fib 0
8
took 0.000574 seconds
In [ ]:
def profile(f):
    def g():
        ...
    return g
    
def timepass():
    for i in range(100000):
        for j in range(100):
            x = i*j

timepass = profile(timepass)
timepass()