#!/usr/bin/env python # coding: utf-8 # # [NTDS'19] tutorial 4: Manipulating graphs with NetworkX # [ntds'19]: https://github.com/mdeff/ntds_2019 # # [Effrosyni Simou](https://people.epfl.ch/effrosyni.simou), [EPFL LTS4](https://lts4.epfl.ch). # Adapted from [NTDS'17 NetworkX demo](https://nbviewer.jupyter.org/github/mdeff/ntds_2017/blob/outputs/demos/04_networkx.ipynb). # In this session we will get introduced to NetworkX, explore some of the most common network models, look at their basic properties and compare them. # ## 1 Creating graphs using network models # # There are many libraries that deal with creation and manipulation of graph data. # We will use NetworkX to create basic network models, as they are already implemented in the library. # The full documentation of NetworkX 2.3 (installed in your `ntds_2019` environment) can be found [online](https://networkx.github.io/documentation/stable/). # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import collections import numpy as np from scipy import spatial from matplotlib import pyplot as plt import networkx as nx # Create an Erdős-Rényi graph with $N=100$ vertices, and a probability of connecting each pair of vertices equal to $p=0.15$. # In[2]: N = 100 # number of nodes p = 0.15 # probability of connection er = nx.erdos_renyi_graph(N, p) # You can retrieve the adjacency matrix of the graph, from the `Graph` object `er` as follows: # In[3]: er_adj = nx.adjacency_matrix(er, range(N)) er_adj = er_adj.todense() # You can now visualise the adjacency matrix: # In[4]: plt.spy(er_adj); # ## 2 Plotting graphs # # With NetworkX and Matplotlib we can also plot a graph. For example, we can plot the Erdős-Rényi graph that we created before as follows: # In[5]: nx.draw(er) # ### 2.1 Exercise # # Create a Barabasi-Albert graph and a Watts-Strogatz graph and plot them. # In[ ]: # Create a Barabasi-Albert graph. ba = # your code here # In[ ]: # Create a Watts-Strogartz graph. ws = # your code here # ## 3 Modifying graphs # # It's easy to add or remove edges, but also nodes. If we add an edge between nodes that don't yet exist, they will be automatically created. # In[6]: er.add_node(100) # In[7]: er.nodes() # Similarly, you can add and remove a collection of nodes or edges, and add and remove one node or edge: # * Adding nodes with: # * `G.add_node`: One node at a time # * `G.add_nodes_from`: A container of nodes # * Adding edges with: # * `G.add_edge`: One edge at a time # * `G.add_edges_from`: A container of edges # * Removing nodes with: # * `G.remove_node`: One node at a time # * `G.remove_nodes_from`: A container of nodes # * Removing edges with: # * `G.remove_edge`: One edge at a time # * `G.remove_edges_from`: A container of edges # # You can get the number of edges with `G.size()`. # Add an edge between two non-existant vertices. Remove all nodes up to node 50. Draw the graph after each change. # In[8]: er.add_edge(101, 102) nx.draw(er) # In[9]: er.remove_nodes_from(range(50)) nx.draw(er) er.nodes() # In[10]: er.size() # ## 4 Degree distribution # `G.degree()` returns a ``DegreeView`` object with pairs of nodes and their degree. # If we specify a node, `G.degree(node)` will return the degree of that node. # Create an Erdős-Rényi network and plot a histogram of node degrees. # In[11]: N = 100 # number of nodes p = 0.15 # probability of connection er = nx.erdos_renyi_graph(N, p) # In[12]: d = er.degree() # In[13]: print(d) # In[14]: # Erdős-Rényi node degree histogram. degree_sequence = sorted([d for n, d in er.degree()], reverse=True) # degree sequence degreeCount = collections.Counter(degree_sequence) deg, cnt = zip(*degreeCount.items()) fig, ax = plt.subplots() ax.bar(deg, cnt) ax.set_title("Degree Histogram") ax.set_ylabel("Count") ax.set_xlabel("Degree"); # ### 4.1 Fitting a distribution # # Try to fit a Poisson distribution. # In[15]: # Poisson distribution. def poisson(mu, k): return np.exp(-mu) * mu**k * (np.math.factorial(k)**-1) # In[16]: # Erdős-Rényi node degree histogram. degree_sequence = sorted([d for n, d in er.degree()], reverse=True) # degree sequence degreeCount = collections.Counter(degree_sequence) deg, cnt = zip(*degreeCount.items()) fig, ax = plt.subplots() ax.bar(deg, cnt, label='Histogram') # Poisson distribution mu = 2 * er.size() / 100 k = np.linspace(1, 25, 25) deg = [100 * poisson(mu, i) for i in k] ax.plot(k, deg, color='r', label='Poisson distribution') ax.legend() ax.set_title("Degree Histogram") ax.set_ylabel("Count") ax.set_xlabel("Degree"); # ### 4.2 Exercise # # Do it for the Barabasi-Albert and Watts-Strogatz networks. # In[ ]: # your code here # ## 5 Creating a graph that approximates a manifold # # We can represent data laying on a manifold (sampled from a manifold) as a graph of connected samples. # # Generate 100 two-dimensional data points from a uniform random distribution in $[0, 1]$. # These will be the coordinates of your nodes. # In[17]: N = 100 nodes_coords = np.random.rand(N, 2) # Two nodes are connected if the Euclidean distance between them is smaller than a threshold. # In that case, the weight of the edge is set to # $$w(i,j) = \exp \left( -{\frac{\operatorname{dist}^2(i,j)}{2\sigma^2}} \right),$$ # for some *kernel width* $\sigma$. # In[18]: sigma = 0.9 threshold = 0.2 # In[19]: def gaussian_kernel(dist, sigma): return np.exp(-dist**2 / (2*sigma**2)) # In[20]: dist = spatial.distance.pdist(nodes_coords, metric='euclidean') dist = spatial.distance.squareform(dist) adj = gaussian_kernel(dist, sigma) adj -= np.identity(N) adj[dist > threshold] = 0 # In[21]: plt.spy(adj); # Plot the graph with NetworkX. # # Hints: # * `nx.from_numpy_array(adj)` creates a graph object from an adjacency matrix (in numpy form) # * `nx.draw(G,pos)` will draw vertices at coordinates specified in pos. Variable pos is a dictionary assigning a pair of coordinates to each node. # In[22]: g = nx.from_numpy_matrix(adj) plt.spy(nx.adjacency_matrix(g).todense()); # In[23]: pos = dict(zip(range(N), nodes_coords)) nx.draw(g, pos) # Plot a degree distribution of this graph. What can you say about the distribution? # In[24]: # node degree histogram degree_sequence = sorted([d for n, d in g.degree()], reverse=True) # degree sequence degreeCount = collections.Counter(degree_sequence) deg, cnt = zip(*degreeCount.items()) fig, ax = plt.subplots() ax.bar(deg, cnt) ax.set_title("Degree Histogram") ax.set_ylabel("Count") ax.set_xlabel("Degree");