Ten Jupyter/IPython essentials

Using IPython as an extended shell

In [1]:
%pwd
Out[1]:
'/home/cyrille/minibook/chapter1'
In [2]:
!wget https://raw.githubusercontent.com/ipython-books/minibook-2nd-data/master/facebook.zip
In [3]:
%ls
Out[3]:
facebook.zip  [...]
In [4]:
!unzip facebook.zip
In [5]:
%ls
Out[5]:
facebook  facebook.zip  [...]
In [6]:
%cd facebook
Out[6]:
/home/cyrille/minibook/chapter1/facebook
In [7]:
%bookmark fbdata
In [8]:
%ls
Out[8]:
0.circles    1684.circles  3437.circles  3980.circles  686.circles
0.edges      1684.edges    3437.edges    3980.edges    686.edges
107.circles  1912.circles  348.circles   414.circles   698.circles
107.edges    1912.edges    348.edges     414.edges     698.edges
In [9]:
files = !ls -1 -S | grep .edges
In [10]:
files
Out[10]:
['1912.edges',
 '107.edges',
 '1684.edges',
 '3437.edges',
 '348.edges',
 '0.edges',
 '414.edges',
 '686.edges',
 '698.edges',
 '3980.edges']
In [11]:
import os
from operator import itemgetter
# Get the name and file size of all .edges files.
files = [(file, os.stat(file).st_size)
         for file in os.listdir('.')
         if file.endswith('.edges')]
# Sort the list with the second item (file size),
# in decreasing order.
files = sorted(files,
               key=itemgetter(1),
               reverse=True)
# Only keep the first item (file name), in the same order.
files = [file for (file, size) in files]
In [12]:
!head -n5 {files[0]}
Out[12]:
2290 2363
2346 2025
2140 2428
2201 2506
2425 2557

Learning magic commands

In [13]:
%lsmagic
Out[13]:
Available line magics:
%alias  %alias_magic  %autocall  %automagic  %autosave  %bookmark  %cat  %cd  %clear  %colors  %config  %connect_info  %cp  %debug  %dhist  %dirs  %doctest_mode  %ed  %edit  %env  %gui  %hist  %history  %install_default_config  %install_ext  %install_profiles  %killbgscripts  %ldir  %less  %lf  %lk  %ll  %load  %load_ext  %loadpy  %logoff  %logon  %logstart  %logstate  %logstop  %ls  %lsmagic  %lx  %macro  %magic  %man  %matplotlib  %mkdir  %more  %mv  %notebook  %page  %pastebin  %pdb  %pdef  %pdoc  %pfile  %pinfo  %pinfo2  %popd  %pprint  %precision  %profile  %prun  %psearch  %psource  %pushd  %pwd  %pycat  %pylab  %qtconsole  %quickref  %recall  %rehashx  %reload_ext  %rep  %rerun  %reset  %reset_selective  %rm  %rmdir  %run  %save  %sc  %set_env  %store  %sx  %system  %tb  %time  %timeit  %unalias  %unload_ext  %who  %who_ls  %whos  %xdel  %xmode

Available cell magics:
%%!  %%HTML  %%SVG  %%bash  %%capture  %%debug  %%file  %%html  %%javascript  %%latex  %%perl  %%prun  %%pypy  %%python  %%python2  %%python3  %%ruby  %%script  %%sh  %%svg  %%sx  %%system  %%time  %%timeit  %%writefile

Automagic is ON, % prefix IS NOT needed for line magics.
In [14]:
%history?
In [15]:
%history -l 5
Out[15]:
files = !ls -1 -S | grep .edges
files
!head -n5 {files[0]}
%lsmagic
%history?
In [16]:
# how many minutes in a day?
24 * 60
Out[16]:
1440
In [17]:
# and in a year?
_ * 365
Out[17]:
525600
In [18]:
%%capture output
%ls
In [19]:
output.stdout
Out[19]:
0.circles    1684.circles  3437.circles  3980.circles  686.circles
0.edges      1684.edges    3437.edges    3980.edges    686.edges
107.circles  1912.circles  348.circles   414.circles   698.circles
107.edges    1912.edges    348.edges     414.edges     698.edges
In [20]:
%%bash
cd ..
touch _HEY
ls
rm _HEY
cd facebook
Out[20]:
_HEY
facebook
facebook.zip
[...]
In [21]:
%%script ghci
putStrLn "Hello world!"
Out[21]:
GHCi, version 7.6.3: http://www.haskell.org/ghc/  :? for help
Loading package ghc-prim ... linking ... done.
Loading package integer-gmp ... linking ... done.
Loading package base ... linking ... done.
Prelude> Hello world!
Prelude> Leaving GHCi.
In [22]:
%%writefile myfile.txt
Hello world!
Out[22]:
Writing myfile.txt
In [23]:
!more myfile.txt
Out[23]:
Hello world!
In [24]:
!rm myfile.txt

Mastering tab completion

In [25]:
%cd fbdata
%ls
Out[25]:
(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook
/home/cyrille/minibook/chapter1/facebook
0.circles    1684.circles  3437.circles  3980.circles  686.circles
0.edges      1684.edges    3437.edges    3980.edges    686.edges
107.circles  1912.circles  348.circles   414.circles   698.circles
107.edges    1912.edges    348.edges     414.edges     698.edges

Writing interactive documents in the Notebook with Markdown

Creating interactive widgets in the Notebook

In [26]:
from IPython.display import YouTubeVideo
YouTubeVideo('j9YpkSX7NNM')
In [27]:
from ipywidgets import interact  # IPython.html.widgets before IPython 4.0
@interact(x=(0, 10))
def square(x):
    return("The square of %d is %d." % (x, x**2))
Out[27]:
'The square of 7 is 49.'

Running Python scripts from IPython

In [28]:
%cd fbdata
%cd ..
Out[28]:
(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook
/home/cyrille/minibook/chapter1/facebook
In [29]:
%%writefile egos.py
import sys
import os
# We retrieve the folder as the first positional argument
# to the command-line call
if len(sys.argv) > 1:
    folder = sys.argv[1]
# We list all files in the specified folder
files = os.listdir(folder)
# ids contains the list of idenfitiers
identifiers = [int(file.split('.')[0]) for file in files]
# Finally, we remove duplicates with set(), and sort the list
# with sorted().
ids = sorted(set(identifiers))
Out[29]:
Overwriting egos.py
In [30]:
%run egos.py facebook
In [31]:
ids
Out[31]:
[0, 107, 348, 414, 686, 698, 1684, 1912, 3437, 3980]
In [32]:
folder = 'facebook'
In [33]:
%run egos.py
In [34]:
%run -i egos.py
In [35]:
ids
Out[35]:
[0, 107, 348, 414, 686, 698, 1684, 1912, 3437, 3980]

Introspecting Python objects

In [36]:
import networkx
In [37]:
networkx.Graph?

Debugging Python code

Benchmarking Python code

In [38]:
%cd fbdata
Out[38]:
(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook
/home/cyrille/minibook/chapter1/facebook
In [39]:
import networkx
In [40]:
graph = networkx.read_edgelist('107.edges')
In [41]:
len(graph.nodes()), len(graph.edges())
Out[41]:
(1034, 26749)
In [42]:
networkx.is_connected(graph)
Out[42]:
True
In [43]:
%timeit networkx.is_connected(graph)
Out[43]:
100 loops, best of 3: 5.92 ms per loop

Profiling Python code

In [44]:
import networkx
In [45]:
def ncomponents(file):
    graph = networkx.read_edgelist(file)
    return networkx.number_connected_components(graph)
In [46]:
import glob
def ncomponents_files():
    return [(file, ncomponents(file))
            for file in sorted(glob.glob('*.edges'))]
In [47]:
for file, n in ncomponents_files():
    print(file.ljust(12), n, 'component(s)')
Out[47]:
0.edges      5 component(s)
107.edges    1 component(s)
1684.edges   4 component(s)
1912.edges   2 component(s)
3437.edges   2 component(s)
348.edges    1 component(s)
3980.edges   4 component(s)
414.edges    2 component(s)
686.edges    1 component(s)
698.edges    3 component(s)
In [48]:
%timeit ncomponents_files()
Out[48]:
1 loops, best of 3: 634 ms per loop
In [49]:
%prun -s cumtime ncomponents_files()
Out[49]:
2391070 function calls in 1.038 seconds

Ordered by: cumulative time

ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     1    0.000    0.000    1.038    1.038 {built-in method exec}
     1    0.000    0.000    1.038    1.038 <string>:1(<module>)
    10    0.000    0.000    0.995    0.100 <string>:1(read_edgelist)
    10    0.000    0.000    0.995    0.100 decorators.py:155(_open_file)
    10    0.376    0.038    0.995    0.099 edgelist.py:174(parse_edgelist)
170174    0.279    0.000    0.350    0.000 graph.py:648(add_edge)
170184    0.059    0.000    0.095    0.000 edgelist.py:366(<genexpr>)
    10    0.000    0.000    0.021    0.002 connected.py:98(number_connected_components)
    35    0.001    0.000    0.021    0.001 connected.py:22(connected_components)