%pwd !wget https://raw.githubusercontent.com/ipython-books/minibook-2nd-data/master/facebook.zip %ls !unzip facebook.zip %ls %cd facebook %bookmark fbdata %ls files = !ls -1 -S | grep .edges files import os from operator import itemgetter # Get the name and file size of all .edges files. files = [(file, os.stat(file).st_size) for file in os.listdir('.') if file.endswith('.edges')] # Sort the list with the second item (file size), # in decreasing order. files = sorted(files, key=itemgetter(1), reverse=True) # Only keep the first item (file name), in the same order. files = [file for (file, size) in files] !head -n5 {files[0]} %lsmagic %history? %history -l 5 # how many minutes in a day? 24 * 60 # and in a year? _ * 365 %%capture output %ls output.stdout %%bash cd .. touch _HEY ls rm _HEY cd facebook %%script ghci putStrLn "Hello world!" %%writefile myfile.txt Hello world! !more myfile.txt !rm myfile.txt %cd fbdata %ls from IPython.display import YouTubeVideo YouTubeVideo('j9YpkSX7NNM') from ipywidgets import interact # IPython.html.widgets before IPython 4.0 @interact(x=(0, 10)) def square(x): return("The square of %d is %d." % (x, x**2)) %cd fbdata %cd .. %%writefile egos.py import sys import os # We retrieve the folder as the first positional argument # to the command-line call if len(sys.argv) > 1: folder = sys.argv[1] # We list all files in the specified folder files = os.listdir(folder) # ids contains the list of idenfitiers identifiers = [int(file.split('.')[0]) for file in files] # Finally, we remove duplicates with set(), and sort the list # with sorted(). ids = sorted(set(identifiers)) %run egos.py facebook ids folder = 'facebook' %run egos.py %run -i egos.py ids import networkx networkx.Graph? %cd fbdata import networkx graph = networkx.read_edgelist('107.edges') len(graph.nodes()), len(graph.edges()) networkx.is_connected(graph) %timeit networkx.is_connected(graph) import networkx def ncomponents(file): graph = networkx.read_edgelist(file) return networkx.number_connected_components(graph) import glob def ncomponents_files(): return [(file, ncomponents(file)) for file in sorted(glob.glob('*.edges'))] for file, n in ncomponents_files(): print(file.ljust(12), n, 'component(s)') %timeit ncomponents_files() %prun -s cumtime ncomponents_files()