#!/usr/bin/env python
# coding: utf-8
# ## Plotly interactive Human Disease Network ##
# The human disease network (HDN) is represented by a graph whose nodes (vertices) are diseases (disorders).
# Two disorders are connected if they share a genetic component.
#
# The data source for this network is a `gexf` [file](https://github.com/gephi/gephi.github.io/blob/master/datasets/diseasome.gexf.zip),
# that contains information provided in the original [study of the HDN](http://www.pnas.org/content/104/21/8685).
#
# I parsed and converted the `gexf` file to a `gml` format,
# to be read with the Python graph library, `igraph`. To reduce edge clutter
# I slightly modified 55 node positions (see the new positions in this [md file](https://github.com/empet/Human-Disease-Network/blob/master/new-positions-HDN.md)).
# The node attributes recorded in the `gml` file are as follows:
#
# ```
# node
# [
# id 0
# name "30"
# color "rgb(238, 68, 68)"
# disclass "Neurological"
# label "Alzheimer disease"
# position "(-116.486664, -95.29654)"
# type "disease"
# size 17.46866
# ]
# ```
#
#
# The HDN displays 516 disorders clustered in 22 classes. The nodes in the same class have a common color, and the size of each node is proportional
# to the number of genes that are known to be associated to the corresponding disorder.
#
# The HDN has links/connections between both disorders in the same class and distinct classes.
#
#
# *The HDN is a connected graph suggesting that the genetic origins of most diseases, to some extent, are shared with other diseases.* [[http://www.pnas.org/content/104/21/8685](http://www.pnas.org/content/104/21/8685)].
# In the following we define the HDN as an `igraph.Graph` instance, and extract from its attributes data to define the Plotly graphical object representing this network.
#
# Usually to an `igraph.Graph` one associates, via a layout algorithm, a spatial position for each node.
# In this case however the node positions are prescribed, i.e. they are given in the `gml` file.
# On the other hand we define the Plotly plot layout, as a dict.
# In[1]:
import platform
print(f'Python version: {platform.python_version()}')
# In[2]:
import plotly
plotly.__version__
# In[3]:
import igraph as ig
import numpy as np
import ast
# In[26]:
from chart_studio.plotly import iplot
import plotly.graph_objs as go
import plotly.io as pio
# Read the `gml` file as an `igraph.Graph`:
# In[8]:
K = ig.Graph.Read_GML('human-disease.gml')
# Define the following function to check whether a network is connected or not:
# In[9]:
def get_graph_components(G, ret=False):
comp = G.components()
if ret:
return comp, len(comp)
else:
return len(comp)
# In[10]:
d_classes = list(set([v['disclass'] for v in K.vs]))
disorder_classes = sorted(d_classes)
# Basic information on the graph K:
# In[11]:
print ('Number of vertices: ', len(K.vs), '\nNumber of edges: ', len(K.es),
'\nDisorder classes: ', len(disorder_classes),
'\nNumber of components: ', get_graph_components(K),
'\n\nDisorder classes: ', disorder_classes)
# Let us compute the eigenvalue centrality of the disease network nodes:
# In[13]:
v = K.evcent(directed=False)
centralities = np.array(v)
I = np.argsort(centralities)[::-1]
# Extract the top twenty diseases, according to their eigenvalue centralities:
# In[14]:
top_20 = [K.vs[j]['label'] for j in I[:20]]
list(zip(top_20, np.sort(centralities)[::-1][:20]))
# Annotation text for data source:
# In[15]:
annotation_text = "HDN: [1] "+\
" Data source: [2]"
# The following function extracts from the graph K, the data needed to plot it via Plotly:
# In[27]:
def get_Plotly_netw(G, graph_layout, title='Plotly Interactive Human Disease Network',
source=annotation_text, flip='lr', width=950, height=850):
#G is an igraph.Graph instance
#graph_layout is an igraph.Layout instance
#title - the network title
#flip is one of the strings 'lr', 'ud' to perform a pseudo-flip effect
#the igraph.Layout is referenced to the screen system of coords, and is upside-down flipped chonging the sign of y-coords
#the global HDN looks better with the left-right flipped layout, by changing the x-coords sign.
#width and height are the sizes of the plot area
graph_layout=np.array(graph_layout)
if flip =='lr':
graph_layout[:, 0] = -graph_layout[:,0]
elif flip == 'ud':
graph_layout[:, 1] = -graph_layout[:,1]
else:
raise ValueError('There is no such a flip type')
m = len(G.vs)
graph_edges = [e.tuple for e in G.es]# represent edges as tuples of end vertex indices
Xn = [graph_layout[k][0] for k in range(m)]#x-coordinates of graph nodes(vertices)
Yn = [graph_layout[k][1] for k in range(m)]#y-coordinates -"-
Xe = []#list of edge ends x-coordinates
Ye = []#list of edge ends y-coordinates
for e in graph_edges:
Xe.extend([graph_layout[e[0]][0],graph_layout[e[1]][0], None])
Ye.extend([graph_layout[e[0]][1],graph_layout[e[1]][1], None])
size = [vertex['size'] for vertex in G.vs]
#define the Plotly graphical objects
plotly_edges = go.Scatter(x=Xe,
y=Ye,
mode='lines',
line=dict(color='rgb(180,180,180)', width=0.75),
hoverinfo='none'
)
plotly_vertices = go.Scatter(x=Xn,
y=Yn,
mode='markers',
name='',
marker=dict(symbol='circle-dot',
size=size,
color=[vertex['color'] for vertex in G.vs],
line=dict(color='rgb(50,50,50)', width=0.5)
),
text=[f"{vertex['label']}
({vertex['disclass']})" for vertex in G.vs],
hoverinfo='text'
)
#Define the Plotly plot layout:
plotly_plot_layout = dict(title=title,
width=width,
height=height,
showlegend=False,
xaxis=dict(visible=False),
yaxis=dict(visible=False),
margin=dict(t=100),
hovermode='closest',
template='none',
paper_bgcolor='rgb(235,235,235)'
)
if source is not None:
annotations = [dict(showarrow=False,
text=source,
xref='paper',
yref='paper',
x=0,
y=-0.1,
xanchor='left',
yanchor='bottom',
font=dict(size=14))]
else:
annotations = []
disorder_name = [vertex['label'] for vertex in G.vs]
for k, s in enumerate(size):
if s>= 20:
annotations.append(dict(text=disorder_name[k],
x=graph_layout[k][0], y=graph_layout[k][1],
xref='x1', yref='y1',
font=dict(color='rgb(50,50,50)', size=10),
showarrow=False
)
)
plotly_plot_layout.update(annotations=annotations)
return go.FigureWidget(data=[plotly_edges, plotly_vertices], layout=plotly_plot_layout)
# Since the position of each node is recorded as a string, we convert it to a tuple of floats:
# In[28]:
pos = [ast.literal_eval(v['position']) for v in K.vs]
HDN_layout = np.array(pos)
# In[39]:
fig = get_Plotly_netw(K, HDN_layout)
#fig
# In[40]:
iplot(fig, filename='Diseasome-Network')
# This network is posted on [Wikipedia](https://en.wikipedia.org/wiki/Human_disease_network).
# In the study of this network we could be interested in the subgraph of a disorder class or a disorder class and its
# neighbors (adjacent nodes).
# First, we define and plot the subgraph of a disorder class.
# The function `extract_subgraph` returns the subgraph generated by the subset of vertices in a disorder class:
# In[30]:
def extract_subgraph(G, disclass='Cancer'):
sbgraph_vertices = [v for v in G.vs if v['disclass']==disclass]
return G.subgraph(sbgraph_vertices)
# In[31]:
def subgraph_fig(G, disorder_class, disorder_classes=disorder_classes,
G_layout=HDN_layout, width=800, height=700):
#generate a plotly figure of the `disclass` subgraph
if disorder_class not in disorder_classes:
raise ValueError("There is no such a disorder class")
S = extract_subgraph(G, disclass=disorder_class)
sbgraph_verts_idx = [int(v['id']) for v in S.vs]#extract the S-vertex indexes in the global graph G
sbgraph_layout = np.array(G_layout)[sbgraph_verts_idx]# extract the layout of the subgraph
figure = get_Plotly_netw(S, sbgraph_layout, title=disorder_class+' subnetwork',
source=None, width=width, height=height)
return S, figure
# The largest disorder class seems to be the class ``Cancer``
# In[40]:
C, figc = subgraph_fig(K, 'Cancer')
#figc
# In[26]:
iplot(figc, filename='Cancer-subnetwork')
# With this default layout (set in the gml data file), the Cancer subnetwork has cluttered edges, and we cannot identify visually
# all pairs of connected disorders. After experimenting with different (i)graph layouts we concluded that the best choice to reduce clutter is the Reingold-Tilford layout:
# In[41]:
new_layout = C.layout('rt')
figc1 = get_Plotly_netw(C, new_layout, title='Cancer subnetwork with Reingold-Tilford layout',
source=None, flip='ud', width=900, height=700)
#figc1
# In[27]:
iplot(figc1, filename='Cancer-subnetwork-rtlayt')
# In[42]:
N, fign=subgraph_fig(K, 'Neurological')
#fign
# In[28]:
iplot(fign, filename='Neurological-subnetwork')
# Let us define a subgraph generated by the vertices in a disorder class and its neighbors, in order to get new insights into
# that class, i.e.
# to identify disorders that share a genetic component with those in the current class:
# In[37]:
def adjacent_subgraph(G, disorder_class='Neurological', disorder_classes=disorder_classes):
if disorder_class not in disorder_classes:
raise ValueError("There is no such a disorder class")
Ed = [e.tuple for e in G.es]
adjacent_edges = [e for e in Ed if G.vs[e[0]]['disclass']==disorder_class
or K.vs[e[1]]['disclass']==disorder_class]
vertsa = []
for e in adjacent_edges:
vertsa.extend([e[0], e[1]])
vertsa_idx = list(set(vertsa))
verts = [G.vs[i] for i in vertsa_idx]
S = K.subgraph(verts)
idxes = [int(v['id']) for v in S.vs]
layt = np.array(HDN_layout)[idxes]
return get_Plotly_netw(S, layt, title=f'Subnetwork of the {disorder_class} class, and its neighbors',
source=None, flip='lr', width=800, height=700)
# In[43]:
figna = adjacent_subgraph(K, disorder_class='Neurological')
#figna
# In[29]:
iplot(figna, filename='Neurological-adjacentN')
# In[9]:
from IPython.core.display import HTML
def css_styling():
styles = open("./custom.css", "r").read()
return HTML(styles)
css_styling()