import pandas
from neo4j.v1 import GraphDatabase
import hetio.readwrite
import hetio.neo4j
import hetio.pathtools
from hetmech.degree_weight import dwwc
from hetio.matrix import get_node_to_position
url = 'https://github.com/dhimmel/hetionet/raw/76550e6c93fbe92124edc71725e8c7dd4ca8b1f5/hetnet/json/hetionet-v1.0.json.bz2'
graph = hetio.readwrite.read_graph(url)
metagraph = graph.metagraph
compound = 'DB01156' # Bupropion
disease = 'DOID:0050742' # nicotine dependences
damping_exponent = 0.4
# CbGpPWpGaD contains duplicate metanodes, so DWPC is not equivalent to DWPC
metapath = metagraph.metapath_from_abbrev('CbGpPWpGaD')
metapath.get_unicode_str()
'Compound–binds–Gene–participates–Pathway–participates–Gene–associates–Disease'
%%time
query = hetio.neo4j.construct_dwpc_query(metapath, property='identifier', unique_nodes=True)
print(query)
driver = GraphDatabase.driver("bolt://neo4j.het.io")
params = {
'source': compound,
'target': disease,
'w': damping_exponent,
}
with driver.session() as session:
result = session.run(query, params)
result = result.single()
result
MATCH path = (n0:Compound)-[:BINDS_CbG]-(n1)-[:PARTICIPATES_GpPW]-(n2)-[:PARTICIPATES_GpPW]-(n3)-[:ASSOCIATES_DaG]-(n4:Disease) USING JOIN ON n2 WHERE n0.identifier = { source } AND n4.identifier = { target } AND n1 <> n3 WITH [ size((n0)-[:BINDS_CbG]-()), size(()-[:BINDS_CbG]-(n1)), size((n1)-[:PARTICIPATES_GpPW]-()), size(()-[:PARTICIPATES_GpPW]-(n2)), size((n2)-[:PARTICIPATES_GpPW]-()), size(()-[:PARTICIPATES_GpPW]-(n3)), size((n3)-[:ASSOCIATES_DaG]-()), size(()-[:ASSOCIATES_DaG]-(n4)) ] AS degrees, path RETURN count(path) AS PC, sum(reduce(pdp = 1.0, d in degrees| pdp * d ^ -{ w })) AS DWPC CPU times: user 13.3 ms, sys: 4.02 ms, total: 17.3 ms Wall time: 276 ms
cypher_pc = result['PC']
print(cypher_pc)
cypher_dwpc = result['DWPC']
print(cypher_dwpc)
142 0.03287590886921623
%%time
compound_id = 'Compound', compound
disease_id = 'Disease', disease
hetio_paths = hetio.pathtools.paths_between(
graph,
source=graph.node_dict[compound_id],
target=graph.node_dict[disease_id],
metapath=metapath,
duplicates=False,
)
# Path count
print(len(hetio_paths))
# DWPC
hetio_dwpc = hetio.pathtools.DWPC(hetio_paths, damping_exponent=damping_exponent)
142 CPU times: user 272 ms, sys: 0 ns, total: 272 ms Wall time: 283 ms
hetio_dwpc
0.03287590886921626
from hetmech.degree_weight import dwpc
def compare_dwpc(output_mat, i, j):
print("\nCOMPARE")
print("dwpc_matrix shape {}".format(output_mat.shape))
print("dwpc from i to j, as computed here: {}".format(output_mat[i,j]))
print("dwpc from i to j, as computed by hetio: {}".format(hetio_dwpc))
print("dwpc from i to j, as computed by cypher: {}".format(cypher_dwpc))
def compare_pc(output_mat, i, j):
print("\nCOMPARE")
print("pc_matrix shape {}".format(output_mat.shape))
print("pc from i to j, as computed here: {}".format(output_mat[i,j]))
print("pc from i to j, as computed by hetio: {}".format(len(hetio_paths)))
print("pc from i to j, as computed by cypher: {}".format(cypher_pc))
rows, cols, dwpc_matrix, seconds = dwpc(graph, metapath, damping=0.0)
i = rows.index(compound)
j = cols.index(disease)
compare_pc(dwpc_matrix, i, j)
seconds
COMPARE pc_matrix shape (1552, 137) pc from i to j, as computed here: 142.0 pc from i to j, as computed by hetio: 142 pc from i to j, as computed by cypher: 142
33.54819082096219
rows, cols, dwpc_matrix, seconds = dwpc(graph, metapath, damping=damping_exponent)
compare_dwpc(dwpc_matrix, i ,j)
seconds
COMPARE dwpc_matrix shape (1552, 137) dwpc from i to j, as computed here: 0.03287590886921622 dwpc from i to j, as computed by hetio: 0.03287590886921626 dwpc from i to j, as computed by cypher: 0.03287590886921623
34.095333027653396
print("Compare time for all-pairs computations via hetio, vs via matrix method")
print("Matrix method total time: {}s for all {} pairs dwpc".format((3.43+3.56), 1552*137))
print("hetio method total time (estimated): {:6.0f}s for all {} pairs dwpc".format( .186*1552*137, 1552*137 ))
Compare time for all-pairs computations via hetio, vs via matrix method Matrix method total time: 6.99s for all 212624 pairs dwpc hetio method total time (estimated): 39548s for all 212624 pairs dwpc