import pandas
from neo4j.v1 import GraphDatabase
import hetio.readwrite
import hetio.neo4j
import hetio.pathtools
from hetmech.degree_weight import dwwc
from hetio.matrix import get_node_to_position
url = 'https://github.com/dhimmel/hetionet/raw/76550e6c93fbe92124edc71725e8c7dd4ca8b1f5/hetnet/json/hetionet-v1.0.json.bz2'
graph = hetio.readwrite.read_graph(url)
metagraph = graph.metagraph
compound = 'DB00050'
disease = 'DOID:0050425'
damping_exponent = 0.4
# CbGeAlD does not contain duplicate nodes, so DWWC is equivalent to DWPC
metapath = metagraph.metapath_from_abbrev('CbGeAlD')
%%time
rows, cols, CbGeAlD_pc = dwwc(graph, metapath, damping=0)
rows, cols, CbGeAlD_dwwc = dwwc(graph, metapath, damping=damping_exponent)
CPU times: user 6.96 s, sys: 1.42 s, total: 8.38 s Wall time: 8.23 s
CbGeAlD_dwwc.shape
(1552, 137)
# Density
CbGeAlD_dwwc.astype(bool).mean()
0.8000460907517496
# Path count matrix
CbGeAlD_pc = CbGeAlD_pc.astype(int)
CbGeAlD_pc
array([[ 0, 4, 11, ..., 0, 0, 6], [ 8, 16, 29, ..., 3, 0, 25], [ 0, 4, 11, ..., 0, 0, 6], ..., [ 0, 0, 0, ..., 0, 0, 0], [ 0, 0, 4, ..., 0, 0, 2], [ 5, 24, 46, ..., 5, 0, 19]])
# DWWC matrix
CbGeAlD_dwwc
array([[0. , 0.00147838, 0.00330918, ..., 0. , 0. , 0.00799022], [0.00361256, 0.00259965, 0.00223441, ..., 0.00050951, 0. , 0.00339391], [0. , 0.00147838, 0.00330918, ..., 0. , 0. , 0.00799022], ..., [0. , 0. , 0. , ..., 0. , 0. , 0. ], [0. , 0. , 0.00417945, ..., 0. , 0. , 0.0020825 ], [0.00174624, 0.00516953, 0.00621809, ..., 0.00277799, 0. , 0.00517235]])
i = rows.index(compound)
j = cols.index(disease)
# Path count
CbGeAlD_pc[i, j]
4
# degree-weighted walk count
CbGeAlD_dwwc[i, j]
0.0014783782118662787
query = hetio.neo4j.construct_dwpc_query(metapath, property='identifier')
print(query)
MATCH path = (n0:Compound)-[:BINDS_CbG]-(n1)-[:EXPRESSES_AeG]-(n2)-[:LOCALIZES_DlA]-(n3:Disease) USING JOIN ON n1 WHERE n0.identifier = { source } AND n3.identifier = { target } WITH [ size((n0)-[:BINDS_CbG]-()), size(()-[:BINDS_CbG]-(n1)), size((n1)-[:EXPRESSES_AeG]-()), size(()-[:EXPRESSES_AeG]-(n2)), size((n2)-[:LOCALIZES_DlA]-()), size(()-[:LOCALIZES_DlA]-(n3)) ] AS degrees, path RETURN count(path) AS PC, sum(reduce(pdp = 1.0, d in degrees| pdp * d ^ -{ w })) AS DWPC
driver = GraphDatabase.driver("bolt://neo4j.het.io")
params = {
'source': compound,
'target': disease,
'w': damping_exponent,
}
with driver.session() as session:
result = session.run(query, params)
result = result.single()
result
<Record PC=4 DWPC=0.0014783782118662789>
compound_id = 'Compound', 'DB00050'
disease_id = 'Disease', 'DOID:0050425'
paths = hetio.pathtools.paths_between(
graph,
source=graph.node_dict[compound_id],
target=graph.node_dict[disease_id],
metapath=metapath,
duplicates=True,
)
paths
[Compound::DB00050 - binds - Gene::2798 - expresses - Anatomy::UBERON:0001016 - localizes - Disease::DOID:0050425, Compound::DB00050 - binds - Gene::2798 - expresses - Anatomy::UBERON:0000955 - localizes - Disease::DOID:0050425, Compound::DB00050 - binds - Gene::2798 - expresses - Anatomy::UBERON:0002298 - localizes - Disease::DOID:0050425, Compound::DB00050 - binds - Gene::2798 - expresses - Anatomy::UBERON:0001017 - localizes - Disease::DOID:0050425]
# Path count
len(paths)
4
# DWWC
hetio.pathtools.DWPC(paths, damping_exponent=damping_exponent)
0.001478378211866279