ipython notebook

The Future of Science -- EXTRAS

Cool idea 1: "Duck typing"

If it looks like a duck, quacks like a duck, etc... assume it's a duck.

In [51]:
import numpy as np
import pylab as pl
In [16]:
# you can, but don't have to declare the type 
dave = 5 
print dave, type(dave)
5 <type 'int'>
In [17]:
# dynamically updated -- good/bad
dave = 5.
print dave, type(dave)
5.0 <type 'float'>
In [18]:
dave = "David Lagattuta"
print dave, type(dave)
David Lagattuta <type 'str'>
In [19]:
# a few string examples
print "split example:", dave.split()
print "join example:", '__'.join(dave.split())
split example: ['David', 'Lagattuta']
join example: David__Lagattuta
In [20]:
# Bracket-types tell you about the "thing" it's storing
dave = [2.3, 4.5, 6.9]
print dave, type(dave)
[2.3, 4.5, 6.9] <type 'list'>
In [21]:
dave = [2.3, 4.5, 6.9, "hey there", 4]
print dave, type(dave)
[2.3, 4.5, 6.9, 'hey there', 4] <type 'list'>
In [ ]:
dave = [2.3, 4.5, 6.9, "hey there", [2.1, 7.8], ]
print dave, type(dave)

Cool Idea 2:

It largely does "what you would hope it would do."

In [22]:
eight_int = 8
five_int = 5
print "8/5 =", eight_int / five_int # Integer division
8/5 = 1
In [23]:
five_float = 5.0
print "8/5.0 =", eight_int / five_float # changes everything
8/5.0 = 1.6
In [24]:
blank_list = [] # how to create a blank list

#add list comprehensions
dave = [x**2 for x in xrange(5)]
In [25]:
print dave, type(dave)
[0, 1, 4, 9, 16] <type 'list'>
In [26]:
from math import pi

print [str(round(pi, i)) for i in xrange(2, 9)]
['3.14', '3.142', '3.1416', '3.14159', '3.141593', '3.1415927', '3.14159265']
In [28]:
# Unnecessarily complicated example to show off.
print [(i, str(round(pi, i)), round(pi, i)) for i in xrange(2, 9)]
[(2, '3.14', 3.14), (3, '3.142', 3.142), (4, '3.1416', 3.1416), (5, '3.14159', 3.14159), (6, '3.141593', 3.141593), (7, '3.1415927', 3.1415927), (8, '3.14159265', 3.14159265)]

Tuples and broadcasting

In [32]:
tuple_example = ('Jonathan', 'Whitmore', 30, '[email protected]')
print tuple_example
print type(tuple_example)
first_name, last_name, age, email = tuple_example

print last_name, first_name
print age
print email
('Jonathan', 'Whitmore', 30, '[email protected]')
<type 'tuple'>
Whitmore Jonathan
30
[email protected]
In [33]:
list_comp_example = [i**2 for i in xrange(10)]
print list_comp_example
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
In [34]:
print sum([i**2 for i in xrange(10)])
285
In [35]:
# Generator version!
print sum(i**2 for i in xrange(10))

# TODO  numpy
285

Splat!

In [36]:
# http://stackoverflow.com/questions/3394835/args-and-kwargs
def print_everything(*args):
    for count, thing in enumerate(args):
        print '{0}. {1}'.format(count, thing)
print_everything('apple', 'banana', 'cabbage')
0. apple
1. banana
2. cabbage
In [37]:
# Similarly, **kwargs allows you to handle named arguments that you have not defined in advance:

def table_things(title_string, **kwargs):
    print title_string
    print "-" * len(title_string)
    for name, value in kwargs.items():
        print '{0} = {1}'.format(name, value)
    
table_things("Classifying stuff", apple = 'fruit', cabbage = 'vegetable')
Classifying stuff
-----------------
cabbage = vegetable
apple = fruit
In [38]:
dave = "Hi there people! "
print dave.startswith("hi  ")
print dave.splitlines()
print dave.strip()
jw = dave.split()
[x for x in jw if x.startswith('p')]
False
['Hi there people! ']
Hi there people!
Out[38]:
['people!']
In [39]:
# You can import modules/subpackages as whatever name you want to call them.
from scipy import ndimage as dave_sucks

Numpy extras

In [41]:
print numpy.arange(12)
[ 0  1  2  3  4  5  6  7  8  9 10 11]
In [40]:
print numpy.arange(12) ** 2
[  0   1   4   9  16  25  36  49  64  81 100 121]
In [42]:
print type(np.arange(12))
<type 'numpy.ndarray'>
In [43]:
# python
x = range(10000)
%timeit [item + 1 for item in x]
1000 loops, best of 3: 654 us per loop
In [44]:
# numpy
x = np.arange(10000)
%timeit x + 1
100000 loops, best of 3: 11.2 us per loop
In [45]:
print "x      -> ", x  # notice the smart printing
print "x[:]   -> ", x[:]
print "x[0]   -> ", x[0] # first element 
print "x[0:5] -> ", x[0:5] # first 5 elements
print "x[-1]  -> ", x[-1] # last element
x      ->  [   0    1    2 ..., 9997 9998 9999]
x[:]   ->  [   0    1    2 ..., 9997 9998 9999]
x[0]   ->  0
x[0:5] ->  [0 1 2 3 4]
x[-1]  ->  9999
In [46]:
# A bit more complicated slicing
print x[-5:] # last five elements
print x[-5:-2] # 
print x[-5:-1] # last 4 elements (not final value)
[9995 9996 9997 9998 9999]
[9995 9996 9997]
[9995 9996 9997 9998]
In [47]:
# create evenly spaced arrays
print np.arange(5, 50, step=3, )
print np.linspace(5, 50, num=3, endpoint=True, )
print np.linspace(5, 50, num=3, endpoint=False, )
[ 5  8 11 14 17 20 23 26 29 32 35 38 41 44 47]
[  5.   27.5  50. ]
[  5.  20.  35.]
In [48]:
np.array([[x, x**2, x/2.0] for x in range(10)]) + 5
Out[48]:
array([[  5. ,   5. ,   5. ],
       [  6. ,   6. ,   5.5],
       [  7. ,   9. ,   6. ],
       [  8. ,  14. ,   6.5],
       [  9. ,  21. ,   7. ],
       [ 10. ,  30. ,   7.5],
       [ 11. ,  41. ,   8. ],
       [ 12. ,  54. ,   8.5],
       [ 13. ,  69. ,   9. ],
       [ 14. ,  86. ,   9.5]])
In [49]:
input_data = np.random.random(50) * 2 * np.pi
response_data = np.sin(input_data)
In [52]:
pl.plot(input_data, response_data)
Out[52]:
[<matplotlib.lines.Line2D at 0x111f733d0>]
In [53]:
pl.scatter(input_data, response_data)
Out[53]:
<matplotlib.collections.PathCollection at 0x111f9e990>
In [54]:
index_order = input_data.argsort()
index_order
Out[54]:
array([ 5, 10, 17, 15, 23, 11, 25, 45, 14, 33, 39, 20, 48,  2, 47, 37, 31,
        1, 40, 28,  9,  4, 35, 41, 38, 27, 42,  3, 49, 43, 34, 19, 13,  6,
       46, 18,  0, 36, 24, 29, 44, 21, 16,  7,  8, 26, 32, 12, 22, 30])
In [55]:
pl.plot(input_data[index_order], response_data[index_order])
Out[55]:
[<matplotlib.lines.Line2D at 0x111fd5350>]

Pickles!

In [1]:
# TODO: Explain this in some detail
In [2]:
import cPickle as pickle
test = np.arange(1e7)

# This should take a bit
with open('myjunkfile', 'w') as filehandle:
    pickle.dump(test, filehandle)

with open('myjunkfile2','wb') as filehandle:
    pickle.dump(test, filehandle, protocol=-1)

with open('myjunkfile2', 'rb') as filehandle:
    new_variable = pickle.load(test)
    

Generators

In [7]:
mygenerator = (x**2 for x in range(3))

for item in mygenerator:
    print item

for item in mygenerator:
    print item
    # Can't reuse w/o reloading them!
0
1
4
In [64]:
# some from: http://stackoverflow.com/questions/101268/hidden-features-of-python?lq=1# <headingcell level=3># Chaining comparison operators
x = 5
print 1 < x < 10
print 10 < x < 20
print 10 > x <= 9
print 5 == x > 4
True
False
True
True
In [65]:
### Index slicing ###
a = np.arange(12)
a[::2]
# reverse
a[::-1]
Out[65]:
array([11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1,  0])
In [66]:
for x in reversed(a):
    print x
    
11
10
9
8
7
6
5
4
3
2
1
0
In [13]:
print a[:]
print a[0:]
print a[-1]
print a[-2]
print a[1:-2]
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[ 0  1  2  3  4  5  6  7  8  9 10 11]
11
10
[1 2 3 4 5 6 7 8 9]

in place value swapping

In [14]:
a = 10
b = 5
print a, b
a, b = b, a
print a, b
10 5
5 10
In [15]:
x = 5
#y = 5
y = 1
x = 3 if (y == 1) else 2
print x
3
In [56]:
# VECTORIZE
def test(a,b):
    if a>b:
        return a
    else:
        return b

print test(10,-3)
print test(-5,-3)
10
-3
In [57]:
#x = np.array(r_[-10:10:2])
x = np.arange(-10,10.1,2)
y = np.ones(x.shape)*-3.

print test(x,y) # error
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-57-d650998eb3ec> in <module>()
      3 y = np.ones(x.shape)*-3.
      4 
----> 5 print test(x,y) # error

<ipython-input-56-8b67af6e4ce4> in test(a, b)
      1 # VECTORIZE
      2 def test(a,b):
----> 3     if a>b:
      4         return a
      5     else:

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
In [58]:
#long way
out = np.empty(x.shape)
for i in range(x.size):
    out[i] = test(x[i],y[i])
print out

#short way
vtest = np.vectorize(test)
print vtest(x,3)

out = vtest(x,y)
print out
[ -3.  -3.  -3.  -3.  -2.   0.   2.   4.   6.   8.  10.]
[ 3  3  3  3  3  3  3  4  6  8 10]
[ -3.  -3.  -3.  -3.  -2.   0.   2.   4.   6.   8.  10.]
In [60]:
import itertools
def coolguys(name):
    if (name == 'David')|(name=='Spider Man'):
        return 'cool'
    else:
        return 'lame'

guys = np.array(['David','Jonathan', 'Fred','Bob','Steve','Spider Man'])

vcool = vectorize(coolguys)  #vectorize the function, for simplicity
results = vcool(guys)

for guy, result in itertools.izip(guys, results):
    print guy, "is", result
David is cool
Jonathan is lame
Fred is lame
Bob is lame
Steve is lame
Spider Man is cool
In [63]:
coolness = {'name':guys, 'status':results} #turn the results into a dictionary

# save dictionary to a pickle for later use
with open('whoiscool','w') as ff:
    pickle.dump(coolness,ff)

with open('whoiscool') as fnew: 
    data = pickle.load(fnew)
    
    
print data # Notice this is the data structure as saved!
{'status': array(['cool', 'lame', 'lame', 'lame', 'lame', 'cool'], 
      dtype='|S64'), 'name': array(['David', 'Jonathan', 'Fred', 'Bob', 'Steve', 'Spider Man'], 
      dtype='|S10')}