In [1]:
from matplotlib import pyplot as plt

Hypergraphs

are matematical creatures which prove to be quite useful.

Mathoverflow question about their applications.

Generating hypergraphs

there are lots of types of hypergraphs, because hypergraphs are pretty general.

Hyperedges are arbitrary sets of nodes, and can therefore contain an arbitrary number of nodes. It is often better study hypergraphs where all hyperedges have the the same number of vertices in hyperedges. (In other words, it is a collection of sets of size k.) So a 2-uniform hypergraph is a graph, a 3-uniform hypergraph is a collection of unordered triples, and so on.

There are also matroids, greedoids and so on... but it gets little complicated.

Generating uniform hypergraphs

My method of generating uniform hypergraphs is pretty straightforward. Generate set of vertices and get some combinations as sets...

todo

  • wipe this notebook out and use new libs
  • write it with a good plan
In [1]:
import random
import math
import itertools

def generate_hypergraph(n=6, number_of_edges=None, k=3):
    vertices = range(1, n + 1)
    
    if number_of_edges is None:
        number_of_edges = int(math.sqrt(n))
    hyper_edges = random.sample(list(itertools.combinations(vertices, k)), number_of_edges)
    return (vertices, hyper_edges)
In [2]:
g = generate_hypergraph(number_of_edges=8)
In [3]:
import networkx as nx

def convert_to_nx_biparty_graph(vertices, hyper_edges):
    G = nx.Graph()
    G.add_nodes_from(vertices)
    G.add_nodes_from(hyper_edges)
    for e in hyper_edges:
        for n in e:
            G.add_edge(n, e)
    return G
        
In [4]:
G = convert_to_nx_biparty_graph(*g)
In [5]:
%matplotlib inline
nx.draw(G)
nx.draw(G,pos=nx.spring_layout(G, iterations=200))
from networkx.algorithms import bipartite
In [6]:
def draw_bipartite_graph(G, group_1, group_2):
    pos = {x:(0 , float(i % 20) * 2) for i, x in enumerate(group_1)}
    pos.update({node: (18.3, 0 + float(i % 20) * 2) for i, node in enumerate(group_2)})
    
    nx.draw(G, pos, node_color='m', node_size=800, with_labels=True, width=1.3, alpha=0.4)

def hipergraph_to_bipartite_parts(G):
    group_1 = (node for node in G.nodes() if not isinstance(node, tuple))
    group_2 = (node for node in G.nodes() if isinstance(node, tuple))
    return group_1, group_2

draw_bipartite_graph(G, *hipergraph_to_bipartite_parts(G))
In [7]:
hyper_edges = g[1]
In [8]:
hyper_edges
Out[8]:
[(1, 2, 4),
 (1, 4, 5),
 (4, 5, 6),
 (1, 3, 6),
 (2, 4, 6),
 (1, 3, 4),
 (3, 5, 6),
 (2, 5, 6)]
In [9]:
from itertools import chain

def convert_to_custom_hyper_G(nodes, edges):
    nodes = [node for node in edges if isinstance(node, tuple)]
    edges = list(chain(*[[(node_1, node_2) for x in range(len(set(node_1) & set(node_2)))] for node_1, node_2 in itertools.combinations(edges, 2) if set(node_1) & set(node_2)]))
    hyper_G = nx.Graph()
    hyper_G.add_nodes_from(nodes)
    hyper_G.add_edges_from(edges)
    return hyper_G
In [10]:
%matplotlib inline
hyper_G = convert_to_custom_hyper_G(*g)
nx.draw(hyper_G, layout=nx.circular_layout(G), node_size=3000, node_color='g', alpha=0.5)
In [11]:
nx.degree(hyper_G)
Out[11]:
{(2, 5, 6): 6,
 (3, 5, 6): 6,
 (1, 2, 4): 6,
 (1, 3, 4): 6,
 (1, 3, 6): 7,
 (2, 4, 6): 7,
 (4, 5, 6): 7,
 (1, 4, 5): 7}
In [12]:
nx.adjacency_matrix(hyper_G)
Out[12]:
matrix([[ 0.,  1.,  1.,  0.,  1.,  1.,  1.,  1.],
        [ 1.,  0.,  0.,  1.,  1.,  1.,  1.,  1.],
        [ 1.,  0.,  0.,  1.,  1.,  1.,  1.,  1.],
        [ 0.,  1.,  1.,  0.,  1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  0.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  0.,  1.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  0.,  1.],
        [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.]])
In [13]:
nx.incidence_matrix(hyper_G)
Out[13]:
matrix([[ 1.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,
          1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,
          0.,  0.,  0.,  1.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,
          0.,  0.,  0.,  1.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,
          1.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.,  1.,  1.,  0.],
        [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
          0.,  1.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  1.,  0.,  1.],
        [ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,
          0.,  0.,  1.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  1.,  1.]])
In [20]:
import numpy as np
from matplotlib import pyplot as plt
from collections import Counter

def show_different_hipergraphs(n=10, k=3, parts=10):
    for fraction in np.linspace(0, 1, parts)[1:]:
        plt.figure(figsize=(6, 6))
        g = generate_hypergraph(n=n, k=k, number_of_edges=int(n * fraction))
        G = convert_to_nx_biparty_graph(*g)
        draw_bipartite_graph(G, *hipergraph_to_bipartite_parts(G))
        hyper_G = convert_to_custom_hyper_G(*g)

        p=nx.degree(hyper_G)

        plt.figure(figsize=(6, 6))
        nx.draw(
            hyper_G, node_color=p.values(),
            node_size=3000,
            cmap=plt.cm.Blues,
            alpha=0.6
        )
        
In [21]:
show_different_hipergraphs(10, 3, 4)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-21-864842f706f8> in <module>()
----> 1 show_different_hipergraphs(10, 3, 4)

<ipython-input-20-a6a09d85a617> in show_different_hipergraphs(n, k, parts)
     18             node_size=3000,
     19             cmap=plt.cm.Blues,
---> 20             alpha=0.6
     21         )
     22 

/home/att/Projects/hypergraph/py3/lib/python3.4/site-packages/networkx/drawing/nx_pylab.py in draw(G, pos, ax, hold, **kwds)
    124         plt.hold(h)
    125     try:
--> 126         draw_networkx(G,pos=pos,ax=ax,**kwds)
    127         ax.set_axis_off()
    128         plt.draw_if_interactive()

/home/att/Projects/hypergraph/py3/lib/python3.4/site-packages/networkx/drawing/nx_pylab.py in draw_networkx(G, pos, with_labels, **kwds)
    257         pos=nx.drawing.spring_layout(G) # default to spring layout
    258 
--> 259     node_collection=draw_networkx_nodes(G, pos, **kwds)
    260     edge_collection=draw_networkx_edges(G, pos, **kwds)
    261     if with_labels:

/home/att/Projects/hypergraph/py3/lib/python3.4/site-packages/networkx/drawing/nx_pylab.py in draw_networkx_nodes(G, pos, nodelist, node_size, node_color, node_shape, alpha, cmap, vmin, vmax, ax, linewidths, label, **kwds)
    379                                alpha=alpha,
    380                                linewidths=linewidths,
--> 381                                label=label)
    382 
    383     node_collection.set_zorder(2)

/usr/lib/python3/dist-packages/matplotlib/axes.py in scatter(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, **kwargs)
   6278                 colors = None  # use cmap, norm after collection is created
   6279             else:
-> 6280                 colors = mcolors.colorConverter.to_rgba_array(c, alpha)
   6281 
   6282         faceted = kwargs.pop('faceted', None)

/usr/lib/python3/dist-packages/matplotlib/colors.py in to_rgba_array(self, c, alpha)
    379         except TypeError:
    380             raise ValueError(
--> 381                 "Cannot convert argument type %s to rgba array" % type(c))
    382         try:
    383             if nc == 0 or c.lower() == 'none':

ValueError: Cannot convert argument type <class 'numpy.ndarray'> to rgba array
In [22]:
A = np.array([[1, 2], [2, 0]])
min(A.sum(axis=0))
Out[22]:
2
In [23]:
def create_balanced_markov_matrix(edges):
    a_matrix = np.zeros([len(edges), len(edges)]) * 10
    for i, edge_1 in enumerate(edges):
        for j, edge_2 in enumerate(edges):
            a_matrix[i][j] = len(set(edge_1) & set(edge_2))
            a_matrix[j][i] = a_matrix[i][j]
    sums = a_matrix.sum(axis=0)
    min_sum = min(sums)
    for i, col_sum in enumerate(sums):
        a_matrix[i][i] -= col_sum - min_sum
    a_matrix *= 1.0 /min_sum
    return a_matrix

def create_markov_matrix(edges):
    a_matrix = np.eye(len(edges))
    
    for i, edge_1 in enumerate(edges):
        for j, edge_2 in enumerate(edges):
            a_matrix[i][j] = len(set(edge_1) & set(edge_2))
            a_matrix[j][i] = a_matrix[i][j]
    
    for i, edge_1 in enumerate(edges):
        a_matrix[i] = a_matrix[i] / sum(a_matrix[i]) * 1.1
   
    return a_matrix
In [24]:
create_markov_matrix(g[1])
Out[24]:
array([[ 0.275     ,  0.18333333,  0.09166667,  0.09166667,  0.18333333,
         0.18333333,  0.        ,  0.09166667],
       [ 0.16923077,  0.25384615,  0.16923077,  0.08461538,  0.08461538,
         0.16923077,  0.08461538,  0.08461538],
       [ 0.07857143,  0.15714286,  0.23571429,  0.07857143,  0.15714286,
         0.07857143,  0.15714286,  0.15714286],
       [ 0.09166667,  0.09166667,  0.09166667,  0.275     ,  0.09166667,
         0.18333333,  0.18333333,  0.09166667],
       [ 0.16923077,  0.08461538,  0.16923077,  0.08461538,  0.25384615,
         0.08461538,  0.08461538,  0.16923077],
       [ 0.18333333,  0.18333333,  0.09166667,  0.18333333,  0.09166667,
         0.275     ,  0.09166667,  0.        ],
       [ 0.        ,  0.09166667,  0.18333333,  0.18333333,  0.09166667,
         0.09166667,  0.275     ,  0.18333333],
       [ 0.09166667,  0.09166667,  0.18333333,  0.09166667,  0.18333333,
         0.        ,  0.18333333,  0.275     ]])
In [25]:
import numpy as np
from numpy.random import random_sample

def weighted_values(values, probabilities, size):
    bins = np.add.accumulate(probabilities)
    bins[-1] = max(1, bins[-1])
    index = np.digitize(random_sample(size), bins)
    return values[index]

def step_diffusion(current_state, markov_matrix):
    values_size = len(current_state)
    values = range(values_size)
    probs = markov_matrix.dot(current_state)
    next_state = np.zeros(values_size)
    next_state[weighted_values(values, probs, 1)] = 1
    
    return next_state
In [25]:
 
In [26]:
markov_matrix = create_markov_matrix(g[1])
current_state = np.zeros(markov_matrix.shape[0])
current_state[0] = 1
print(current_state)
print(markov_matrix)
[ 1.  0.  0.  0.  0.  0.  0.  0.]
[[ 0.275       0.18333333  0.09166667  0.09166667  0.18333333  0.18333333
   0.          0.09166667]
 [ 0.16923077  0.25384615  0.16923077  0.08461538  0.08461538  0.16923077
   0.08461538  0.08461538]
 [ 0.07857143  0.15714286  0.23571429  0.07857143  0.15714286  0.07857143
   0.15714286  0.15714286]
 [ 0.09166667  0.09166667  0.09166667  0.275       0.09166667  0.18333333
   0.18333333  0.09166667]
 [ 0.16923077  0.08461538  0.16923077  0.08461538  0.25384615  0.08461538
   0.08461538  0.16923077]
 [ 0.18333333  0.18333333  0.09166667  0.18333333  0.09166667  0.275
   0.09166667  0.        ]
 [ 0.          0.09166667  0.18333333  0.18333333  0.09166667  0.09166667
   0.275       0.18333333]
 [ 0.09166667  0.09166667  0.18333333  0.09166667  0.18333333  0.
   0.18333333  0.275     ]]
In [27]:
current_state[0] = 1

from collections import Counter

c = Counter()
states = []
t_max = 30
for _ in range(t_max):
    current_state = step_diffusion(current_state, markov_matrix)
    visited_hyperedge = current_state.tolist().index(1)
    c[visited_hyperedge] += 1
    states.append(visited_hyperedge)
In [28]:
c.most_common()
Out[28]:
[(3, 7), (4, 6), (5, 5), (0, 4), (6, 4), (7, 2), (1, 1), (2, 1)]
In [29]:
y, x = c.keys(), c.values()
print(list(x), y)
[4, 1, 1, 7, 6, 5, 4, 2] dict_keys([0, 1, 2, 3, 4, 5, 6, 7])
In [30]:
from matplotlib import pyplot as plt
In [31]:
plt.hist(states, normed=True)
Out[31]:
(array([ 0.19047619,  0.04761905,  0.04761905,  0.        ,  0.33333333,
         0.28571429,  0.        ,  0.23809524,  0.19047619,  0.0952381 ]),
 array([ 0. ,  0.7,  1.4,  2.1,  2.8,  3.5,  4.2,  4.9,  5.6,  6.3,  7. ]),
 <a list of 10 Patch objects>)
In [32]:
g[1][3]
Out[32]:
(1, 3, 6)
In [33]:
n = 20
k = 3
fraction = 1.0 / 2
g = generate_hypergraph(n=n, k=k, number_of_edges=int(n * fraction))
G = convert_to_nx_biparty_graph(*g)
draw_bipartite_graph(G, *hipergraph_to_bipartite_parts(G))
hyper_G = convert_to_custom_hyper_G(*g)

print("Hyperg")
print(hyper_G)
p=nx.degree(hyper_G)

plt.figure(figsize=(6, 6))


markov_matrix = create_markov_matrix(g[1])
Hyperg

<matplotlib.figure.Figure at 0x7f07a48b7a58>
In [34]:
print markov_matrix
print g[1]
  File "<ipython-input-34-880cfd421d4f>", line 1
    print markov_matrix
                      ^
SyntaxError: invalid syntax
In [35]:
from collections import Counter
current_state = np.zeros(markov_matrix.shape[0])
t_max = 100000
current_state[2] = 1
def simulate(current_state, markov_matrix, t_max):
    c = Counter()
    states = []
    for _ in xrange(t_max):
        current_state = step_diffusion(current_state, markov_matrix)
        visited_hyperedge = current_state.tolist().index(1)
        c[visited_hyperedge] += 1
        states.append(visited_hyperedge)
    return c.most_common(), states

most_common, states = simulate(current_state, markov_matrix, t_max)

print markov_matrix
print most_common
  File "<ipython-input-35-83ff2fbefed3>", line 17
    print markov_matrix
                      ^
SyntaxError: invalid syntax
In [36]:
plt.hist(states, alpha=.5);
In [37]:
def count_nodes(nodes, edges, occurences):
    c = Counter()
    for index, count in occurences:
        for edge in edges[index]:
            c[edge] += count
    for node in nodes:
        if node not in c:
            c[node] = 0
    return c
In [39]:
most_common_nodes = count_nodes(g[0], g[1], most_common)

plt.plot(
    most_common_nodes.keys(),
    most_common_nodes.values(),
    'm:o',
    alpha=0.7,
    linewidth=5
)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-39-3cc5ef1d1ace> in <module>()
----> 1 most_common_nodes = count_nodes(g[0], g[1], most_common)
      2 
      3 plt.plot(
      4     most_common_nodes.keys(),
      5     most_common_nodes.values(),

NameError: name 'most_common' is not defined
In [38]:
 
In [34]: