May 24, 2013
Lets start with warm up problems. http://anandology.com/apy/slides/python-warmup.html
# problem 1
x = 1
y = x
x = 2
print x, y
2 1
# problem 2
x = [1, 2]
y = [x, 5]
x.append(3)
print y
[[1, 2, 3], 5]
def square(x):
return x*x
print square(4)
16
f = square
print f(4)
16
def fxy(f, x, y):
return f(x) + f(y)
print fxy(square, 3, 4)
25
f = lambda x: x*x
print f(3)
9
print fxy(lambda x: x*x*x, 3, 4)
91
x = ['python', 'perl',
'java', 'c',
'haskell', 'ruby']
print sorted(x)
['c', 'haskell', 'java', 'perl', 'python', 'ruby']
print sorted(x, key=len)
['c', 'perl', 'java', 'ruby', 'python', 'haskell']
Default Arguments
def inc(x, amount=1):
return x+amount
print inc(5)
6
print inc(5, 4)
9
print inc(x=5, amount=4)
9
Lets find out when is default value computed.
def f(x):
print "f is called with", x
return x
print "before defining inc"
def inc(x, amount=f(1)):
return x + amount
print "after defining inc"
print inc(5)
before defining inc f is called with 1 after defining inc 6
for a in [1, 2, 3, 4]:
print a
1 2 3 4
for a in (1, 2, 3, 4):
print a
1 2 3 4
for k in {"a": 1, "b": 2}:
print k
a b
for c in "hello":
print c
h e l l o
",".join(["a", "b", "c"])
'a,b,c'
",".join({"a": 1, "b": 2})
'a,b'
",".join("hello")
'h,e,l,l,o'
max([1, 2, 3, 4])
4
max("hello")
'o'
max({"a": 1, "b": 2})
'b'
Lets try to understand how iteration works.
x = iter([1, 2, 3, 4])
x.next()
1
x.next()
2
x.next()
3
x.next()
4
x.next()
--------------------------------------------------------------------------- StopIteration Traceback (most recent call last) <ipython-input-33-e05f366da090> in <module>() ----> 1 x.next() StopIteration:
x = iter("abc")
x.next()
'a'
x.next()
'b'
x.next()
'c'
x.next()
--------------------------------------------------------------------------- StopIteration Traceback (most recent call last) <ipython-input-37-e05f366da090> in <module>() ----> 1 x.next() StopIteration:
class yrange:
def __init__(self, n):
self.i = 0
self.n = n
def __iter__(self):
return self
def next(self):
i = self.i
if i < self.n:
self.i = i + 1
return i
else:
raise StopIteration()
y = yrange(5)
for a in y:
print a
0 1 2 3 4
Lets try to see how for loop is behind the scenes.
for a in x:
print a
Translate this in to while loop.
it = iter(x)
while True:
try:
a = it.next()
except StopIteration:
break
print a
# [1, 2, 3, 4] is an iterable object.
# x = iter([1, 2, 3, 4]) gives an iterator.
# next() method can be called on an iterator.
y = yrange(5)
print list(y)
print list(y)
[0, 1, 2, 3, 4] []
class zrange:
def __init__(self, n):
self.n = n
def __iter__(self):
return yrange(self.n)
z = zrange(5)
print list(z)
print list(z)
[0, 1, 2, 3, 4] [0, 1, 2, 3, 4]
def yrange(n):
i = 0
while i < n:
yield i
i += 1
y = yrange(3)
print y.next()
0
y.next()
1
y.next()
2
y.next()
--------------------------------------------------------------------------- StopIteration Traceback (most recent call last) <ipython-input-53-75a92ee8313a> in <module>() ----> 1 y.next() StopIteration:
def f():
print "begin f"
yield 1
print "after yielding 1"
yield 2
print "end"
a = f()
print a
<generator object f at 0x10270f050>
a.next()
begin f
1
a.next()
after yielding 1
2
a.next()
--------------------------------------------------------------------------- StopIteration Traceback (most recent call last) <ipython-input-58-aa817a57a973> in <module>() ----> 1 a.next() StopIteration:
end
max(yrange(4))
3
sum(yrange(4))
6
def squares(numbers):
for n in numbers:
yield n*n
print sum(squares(xrange(1000000)))
333332833333500000
%%file a.txt
1
2
3
4
5
Writing a.txt
def toint(strings):
for s in strings:
yield int(s)
print sum(toint(open("a.txt")))
15
print sum(squares(toint(open("a.txt"))))
55
# the regular way is
result = 0
for line in open("a.txt"):
n = int(line)
result += n
print result
15
Problem: Write a function joiniters
, that takes 2 iterators and returns a combined iterator.
print sum(joiniters([1, 2, 3], [4, 5, 6]))
for a in joiniters([1, 2, 3], "hello"):
print a
print list(joiniters(iter([1, 2]), iter([3, 4])))
Problem: Write a function iterappend
, that takes 2 arguments, an iterator and a value and return a new iterator containing all the elements of the given iterator and the given value.
>>> list(iterappend([1, 2], 3))
[1, 2, 3]
# Solution to joiniters
def joiniters(x, y):
for a in x:
yield a
for b in y:
yield b
# solution to iterappend
def iterappend(x, end):
return joiniters(x, [end])
print sum(iterappend([1, 2], 3))
x = range(10)
[a*a for a in x]
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
[a*a for a in x if a % 2 == 0]
[0, 4, 16, 36, 64]
%%file square.py
def square(x):
return x*x
def cube(x):
return x*x*x
Writing square.py
# Find all line containing function definations
[line for line in open("square.py") if line.startswith("def")]
['def square(x):\n', 'def cube(x):\n']
# fine all function names
[line.split("(")[0][len("def "):] for line in open("square.py")
if line.startswith("def")]
['square', 'cube']
%%file a.csv
a,b,c
1,2,3
1,4,9
1,8,27
Writing a.csv
[line.strip("\n").split(",") for line in open("a.csv")]
[['a', 'b', 'c'], ['1', '2', '3'], ['1', '4', '9'], ['1', '8', '27']]
def squares(values):
return [x*x for x in values]
print sum(squares(xrange(1000000)))
333332833333500000
squares_list = [x*x for x in xrange(1000000)]
squares_gen = (x*x for x in xrange(1000000))
squares_gen
<generator object <genexpr> at 0x10270f460>
sum(squares_gen)
333332833333500000
sum((x*x for x in xrange(1000000)))
333332833333500000
sum(x*x for x in xrange(1000000))
333332833333500000
Problem Write function squares
that takes a iterable over numbers as argument and returns an iterator over their squares. Use generator expressions for doing this.
print sum(squares(xrange(1000)))
def grep(pattern, fileobj):
return (line for line in fileobj if pattern in line)
def printlines(lines):
for line in lines:
print line.strip("\n")
fileobj = open("square.py")
lines = grep("def", fileobj)
printlines(lines)
def square(x): def cube(x):
Lets try to make the program search in multiple files instead of just single one.
%%file hello.py
def hello(name):
print "hello", name
Writing hello.py
def joiniters(x, y):
for a in x: yield a
for b in y: yield b
fileobj1 = open("square.py")
fileobj2 = open("hello.py")
lines = joiniters(fileobj1, fileobj2)
lines = grep("def", lines)
printlines(lines)
def square(x): def cube(x): def hello(name):
Lets extract that into useful function.
def readfiles(filenames):
"""Reads all files and returns iterator over lines."""
for filename in filenames:
for line in open(filename):
yield line
lines = readfiles(["square.py", "hello.py"])
lines = grep("def", lines)
printlines(lines)
def square(x): def cube(x): def hello(name):
Lets say some files are compressed using gzip and we want our program to read them as well.
!gzip hello.py
gzip: hello.py: No such file or directory
!ls *.gz
hello.py.gz
import gzip
def xopen(filename):
if filename.endswith(".gz"):
return gzip.open(filename)
else:
return open(filename)
def readfiles(filenames):
"""Reads all files and returns iterator over lines."""
for filename in filenames:
for line in xopen(filename):
yield line
lines = readfiles(["square.py", "hello.py.gz"])
lines = grep("def", lines)
printlines(lines)
def square(x): def cube(x): def hello(name):
Problem: Write a function countiter
to count number of elements in an iterator.
>>> countiter(xrange(100))
100
>>> countiter(x for x in xrange(100) if x % 2 == 0)
50
Problem: Write a function to linecount
to count number of lines in a given file.
print linecount("square.py")
Problem: Write a function wordcount
to count number of words in a file.
print wordcount("square.py")
# countiter solution
def countiter(it):
count = 0
for x in it:
count += 1
return count
def countiter(it):
return sum(1 for x in it)
import itertools
print list(itertools.chain([1, 2, 3, 4], [5, 6]))
[1, 2, 3, 4, 5, 6]
for a, b in itertools.izip("hello", "world"):
print a, b
h w e o l r l l o d
Problem: Implement izip
function.
x = itertools.izip("hello", "world")
print x.next()
('h', 'w')
Problem: Implement a function numbers
that generate an infinite sequence of numbers starting from 0.
>>> n = numbers()
>>> n.next()
0
>>> n.next()
1
>>> n.next()
2
# solution to izip
def izip(x, y):
x = iter(x)
y = iter(y)
while True:
yield x.next(), y.next()
for a, b in izip([1, 2, 3], "hello"):
print a, b
1 h 2 e 3 l
for i, c in enumerate("hello"):
print i, c
0 h 1 e 2 l 3 l 4 o
def myenumerate(it):
return izip(numbers(), it)
def numbers():
i = 0
while True:
yield i
i += 1
for i, c in myenumerate("hello"):
print i, c
0 h 1 e 2 l 3 l 4 o
def exp(x, n):
print "exp", x, n
if n == 0:
return 1
else:
return x * exp(x, n-1)
print exp(2, 10)
exp 2 10 exp 2 9 exp 2 8 exp 2 7 exp 2 6 exp 2 5 exp 2 4 exp 2 3 exp 2 2 exp 2 1 exp 2 0 1024
def fast_exp(x, n):
print "fast_exp", x, n
if n == 0:
return 1
elif n % 2 == 0:
return fast_exp(x*x, n/2)
else:
return x * fast_exp(x, n-1)
print fast_exp(2, 100)
fast_exp 2 100 fast_exp 4 50 fast_exp 16 25 fast_exp 16 24 fast_exp 256 12 fast_exp 65536 6 fast_exp 4294967296 3 fast_exp 4294967296 2 fast_exp 18446744073709551616 1 fast_exp 18446744073709551616 0 1267650600228229401496703205376
Product: Write a function product
to compute product of 2 numbers, using +
and -
operators only.
Example: Flatten list
def flatten_list(x, result=None):
"""Flattens a nested list.
>>> flatten_list([[1, 2], [3, 4, [5]]])
[1, 2, 3, 4, 5]
"""
if result is None:
result = []
for a in x:
if isinstance(a, list):
flatten_list(a, result)
else:
result.append(a)
return result
print flatten_list([1, 2, 3])
print flatten_list([[1, 2], [3, 4, [5]]])
[1, 2, 3] [1, 2, 3, 4, 5]
Problem: Write a function flatten_dict
to flatten a nested dictionary by joining the keys with .
character.
>>> flatten_dict({'a': 1, 'b': {'x': 2, 'y': 3}, 'c': 4})
{'a': 1, 'b.x': 2, 'b.y': 3, 'c': 4}
def flatten_dict(d, result=None, prefix=None):
if result is None:
result = {}
for k, v in d.items():
if prefix is None:
key = k
else:
key = prefix + "." + k
if isinstance(v, dict):
flatten_dict(v, result, prefix=key)
else:
result[key] = v
return result
flatten_dict({'a': 1, 'b': {'x': 2, 'y': 3, 'z': {'p': 5}}, 'c': 4})
{'a': 1, 'b.x': 2, 'b.y': 3, 'b.z.p': 5, 'c': 4}
def json_encode(data):
if isinstance(data, bool):
if data:
return "true"
else:
return "false"
elif isinstance(data, (int, float)):
return str(data)
elif isinstance(data, str):
return '"' + data + '"'
elif isinstance(data, list):
elements = [json_encode(d) for d in data]
values = ", ".join(elements)
return "[" + values + "]"
print json_encode(True)
print json_encode(1.234)
print json_encode([1, 2, 3, True, "hello", [3, 4]])
print json_encode({"a": [1, True], "b": {"name": "hello"}})
# {"a": [1, true], "b": {"name": "hello"}}
true 1.234 [1, 2, 3, true, "hello", [3, 4]]
indent = 0
def trace(f):
def g(n):
global indent
print "| " * indent + "|-- " + f.__name__, n
indent += 1
value = f(n)
indent -= 1
return value
return g
def memoize(f):
cache = {}
def g(n):
if n not in cache:
cache[n] = f(n)
return cache[n]
return g
import time
#fib = trace(fib)
#fib = memoize(fib)
@memoize
@trace
def fib(n):
if n == 0 or n == 1:
return 1
else:
return fib(n-1) + fib(n-2)
t0 = time.time()
print fib(5)
t1 = time.time()
print "took %f seconds" % (t1-t0)
|-- fib 5 | |-- fib 4 | | |-- fib 3 | | | |-- fib 2 | | | | |-- fib 1 | | | | |-- fib 0 8 took 0.000574 seconds
def profile(f):
def g():
...
return g
def timepass():
for i in range(100000):
for j in range(100):
x = i*j
timepass = profile(timepass)
timepass()