#!/usr/bin/env python # coding: utf-8 # # Graphs # Often in computation we have __data__ from the world, and a __question__ we want to answer about these data. # To do so, we need to find a __model__ for the data, and a way to translate our question into a __mathematical question about the model__. # Here are some examples: # # * Suppose you have a map of Addis Ababa and want to find out what's the fastest way to get from the national museum to Merkato. # # * Suppose you are Facebook and you are trying to figure out how many friends of friends does the average Ethiopian has. # # ![title](addis_map.jpg) # What is perhaps most surprising is that these and any many other questions, all use the same mathematical model of a __graph__ # A __graph__ is just a way to store __connections__ between pairs of entities: # # * The graph of Addis's roads could be composed of all street intersections, with a connection between intersection $u$ and intersection $v$ if they are directly connected by a road. # # * The Facebook graphs is composed of all Facebook users, with a connection between user $u$ and user $v$ if they are friends. # # * The gene-symptom interaction graph is composed of all genes and all "symptoms" (also known as phenotypes: some observable differences in people), where gene $u$ is connected to symptom $v$ if there is a correlation between people having the gene $u$ and symptom $v$. # Mathematically, a graph is a set $V$ of __vertices__ and a set $E$ of pairs of these vertices which is known as the set of __edges__. We say that a vertex $u\in V$ is connected to $v\in V$ if the pair $(u,v)$ is in $E$. # A graph where $(u,v)\in E$ if and only if $(v,u)\in E$ is known as an __undirected__ graphs. Undirected graphs form an important special case, and we will mostly be interested in those graphs. # Sometimes the edges (or vertices) of the graph are __labeled__ (often by a number), for example in the case of the road network, we might label every road segment with the average time it takes to travel from one end to the other. # There are two main representations for graphs. We can always assume the vertices are simply identified by the numbers $1$ to $n$ for some $n$. # # The __adjacency list representation__ is an array $L$ where $L[i]$ is the list of all neighbors of the vertex $i$ (i.e., all $j$ such that $(i,j)\in E$) # # The __adjacency matrix representation__ is an $n\times n$ two-dimensional array $M$ (i.e., matrix) such that $M[i][j]$ equals $1$ if $j$ is a neighbor of $i$ and equals $0$ otherwise. # ### Questions # * If a graph has $n$ vertices and $m$ edges - how big is its adjacency list representation? how big is its adjacency matrix representation? # * Given a graph $G$ on $n$ vertices and two vertices $i,j$, how long can it take us (in the worst case) to find out if $j$ is a neighbor of $i$ when $G$ is represented in the adjacenecy list form? How long will it take in the adjacenecy matrix form? # In[2]: get_ipython().run_line_magic('', "run 'boaz_utils.ipynb'") G = [[1],[2],[3],[0]] draw_graph(G) # In[8]: G = [[1,2,3,4,5,6],[0],[0],[0],[0],[0],[0]] draw_graph(G) # In[6]: n = 20 G = [ [(i+1) % n] for i in range(n) ] draw_graph(G) # In[7]: def grid_neighbors(i,j,n): if i==n-1 and j== n-1: return [] if i==n-1: return [i*n+j+1] if j==n-1: return [(i+1)*n+j] return [n*i+((j+1) % n), n*((i+1) % n)+j] # In[8]: n = 5 G = [ grid_neighbors(i,j,n) for i in range(n) for j in range(n) ] # In[9]: draw_graph(G,'grid_layout') # # Memoization applied to graphs: shortest paths # Suppose we have a graph representing an airport network, a road network, etc. For example consider an airport network: vertices are airports, and edges represent direct flights from one airport to the next. Each edge now has an associated length corresponding to the length of the flight. Now how can we find the shortest path from one vertex to the others in such a graph? This can be done using recursion and memoization. The non-recursive, iterative implementation of this approach (that is, loops instead of recursion) is called the Bellman-Ford algorithm. # The basic idea is to create a recursive function ```shortestPathHelper```($x$, $y$, $t$) which finds the shortest path from $x$ to $y$ which takes at most $t$ steps. One option is that it is the same as the shortest path taking at most $t − 1$ steps, and the other is that we should travel to some vertex $z$ first in $t − 1$ steps then take the edge $(z, y)$ in the $t^{th}$ step. We recurse on both options and take the better of the two, and we use memoization to make the function faster. Note that if it’s possible to get from $x$ to $y$ at all, then it is possible to do so in $n − 1$ steps, where the graph has $n$ vertices, so the length of the shortest path from $x$ to $y$ is ```shortestPathHelper```($x, y, n-1$). # In[19]: # returns length of shortest path from x to y using at most t steps # B is an inverse adjacency list. That is, B[y] is a list [ [x0,w0], ..., [x1, w1] ] # such that for each i there is an edge (xi, y) with weight wi def shortestPathHelper(B, x, y, t, mem, seen): if t == 0: if x == y: return 0 else: return float('infinity') elif seen[y][t]: return mem[y][t] seen[y][t] = True # first option: do it in t-1 steps ans = shortestPathHelper(B, x, y, t-1, mem, seen) # second option: go to a vertex z that has an edge to y first, in # at most t-1 steps, then take the edge (z, y) for p in B[y]: z = p[0] weight = p[1] val = shortestPathHelper(B, x, z, t-1, mem, seen) ans = min(ans, weight + val) mem[y][t] = ans return ans # A is the adjacency list of the graph # A[u][i][0] is the ith neighbor of vertex u, and A[u][i][1] is the # weight of the edge (u, A[u][i][0]) # # returns the length of the shortest path from x to y def shortestPath(A, x, y): # mem[i][j] should be float(‘infinity’) if we can’t get from x to i in at # most j steps. Otherwise, it’s the length of the shortest path from x to # i taking at most j steps. mem = [ [0]*(len(A)+1) for i in range(len(A)) ] seen = [ [False]*(len(A)+1) for i in range(len(A)) ] # B is an inverse adjacency list. B[i] is a list of lists. # Each element of B[i] is a list [x, w] representing that (x,i) # is an edge in the graph, and w is its length B = [ [] for i in range(len(A)) ] for i in range(len(A)): for p in A[i]: # p is the pair [j, length(i,j)] B[p[0]] += [[i, p[1]]] return shortestPathHelper(B, x, y, len(A) - 1, mem, seen) # We can also memoize automatically, as we did in the morning lecture. # In[15]: def memoize(func): mem = dict() def f(*params, calls=mem): key = repr(params) if not key in calls: calls[key] = func(*params) return calls[key] return f def shortestPathRecurse(B, x, y, t): if t == 0: if x == y: return 0 else: return float('infinity') # first option: do it in t-1 steps ans = shortestPathRecurse(B, x, y, t-1) # second option: go to a vertex z that has an edge to y first, in # at most t-1 steps, then take the edge (z, y) for p in B[y]: z = p[0] weight = p[1] val = shortestPathRecurse(B, x, z, t-1) ans = min(ans, weight + val) return ans shortestPathRecurse = memoize(shortestPathRecurse) # memoize automatically def shortestPath2(A, x, y): mem = dict() # B is an inverse adjacency list. B[i] is a list of lists. # Each element of B[i] is a list [x, w] representing that (x,i) # is an edge in the graph, and w is its length B = [ [] for i in range(len(A)) ] for i in range(len(A)): for p in A[i]: # p is the pair [j, length(i,j)] B[p[0]] += [[i, p[1]]] return shortestPathRecurse(B, x, y, len(A) - 1) # # Example # # ![title](bellmanford.jpg) # In[3]: # as an adjacency list with weights A = [ [[2, 4]], # (0,2) with weight 4 [[3, 2]], # (1,3) with weight 2 [[1, 1], [3, 4]], # (2,1) with weight 1, (2,3) with weight 4 [[4, 7]], # (3,4) with weight 7 [] # vertex 4 has no outgoing edges ] # In[20]: shortestPath(A, 0, 2) # In[24]: # there are two routes # 0->2->3 and 0->2->1->3 # 0->2->3 has total length 4+4 = 8 # 0->2->1->3 has total length 4+1+2 = 7 shortestPath(A, 0, 3) # In[5]: shortestPath(A, 3, 0) # # Finding the actual path # # Typically we don't just want to know the length of the shortest path. We want to know what the shortest path is itself! What route should we take? Just as we remembered the choices we made for going to parties yesterday, we can similarly here also remember the choices we made. Below is a modified version of the above code which remembers our choices. # In[6]: # returns length of shortest path from x to y using at most t steps # B is an inverse adjacency list. That is, B[y] is a list [ [x0,w0], ..., [x1, w1] ] # such that for each i there is an edge (xi, y) with weight wi def shortestPathHelper(B, x, y, t, mem, seen, choices): if t == 0: if x == y: return 0,choices else: return float('infinity'),choices elif seen[y][t]: return mem[y][t],choices seen[y][t] = True # first option: do it in t-1 steps choices[y][t] = -1 # we use -1 to mean we actually just used t-1 steps ans,choices = shortestPathHelper(B, x, y, t-1, mem, seen, choices) # second option: go to a vertex z that has an edge to y first, in # at most t-1 steps, then take the edge (z, y) for p in B[y]: z = p[0] weight = p[1] val,choices = shortestPathHelper(B, x, z, t-1, mem, seen, choices) if weight + val < ans: # it is cheaper to go through z choices[y][t] = z ans = weight + val mem[y][t] = ans return ans,choices # A is the adjacency list of the graph # A[u][i][0] is the ith neighbor of vertex u, and A[u][i][1] is the # weight of the edge (u, A[u][i][0]) # # returns the length of the shortest path from x to y def shortestPath(A, x, y): # mem[i][j] should be float(‘infinity’) if we can’t get from x to i in at # most j steps. Otherwise, it’s the length of the shortest path from x to # i taking at most j steps. mem = [ [0]*(len(A)+1) for i in range(len(A)) ] seen = [ [False]*(len(A)+1) for i in range(len(A)) ] choices = [ [0]*(len(A)+1) for i in range(len(A)) ] # B is an inverse adjacency list. B[i] is a list of lists. # Each element of B[i] is a list [x, w] representing that (x,i) # is an edge in the graph, and w is its length B = [ [] for i in range(len(A)) ] for i in range(len(A)): for p in A[i]: # p is the pair [j, length(i,j)] B[p[0]] += [[i, p[1]]] return shortestPathHelper(B, x, y, len(A) - 1, mem, seen, choices) # if no path, return [[], float('infinity')] # else return a list of size 2: first element is an optimal path, starting at x and ending at y # and the second element is the weight of the path def findPath(A, x, y): length,choices = shortestPath(A, x, y) if length == float('infinity'): return [[], length] path = [y] t = len(A) - 1 while t > 0: if choices[y][t] != -1: path = [choices[y][t]] + path y = choices[y][t] t -= 1 return [path, length] # ![title](bellmanford.jpg) # In[7]: findPath(A, 0, 3)