#!/usr/bin/env python # coding: utf-8 # ## Pathfinding # # Different ontologies exhibit different degrees of latticeyness. Highly latticed ontologies will have a combinatorial expolosion of paths to a root node. # # This notebook has an analysis of path counts for the HPO # # In[3]: ## We use a Factory object in the ontobio library from ontobio import OntologyFactory # In[5]: ## Get the HPO using default method (currently OntoBee SPARQL) ## This may take 5-10s the first time you run it; afterwards it is cached ofa = OntologyFactory() ont = ofa.create('hp') # In[9]: ## The OWL version of HPO (used here) has many interesting relationship types; ## for now we just care about is-a (subClassOf between named classes) ont = ont.subontology(relations='subClassOf') # In[13]: ## Get the root of the abnormality subset [root] = ont.search('Phenotypic abnormality') root # In[15]: ## Arbitrary term [t] = ont.search('Clinodactyly of the 3rd finger') t # In[18]: ## We use the standard python networkx library for pathfinding here ## This is easily extracted from an ontology object from networkx import nx G = ont.get_graph() G # ### Use networkx to find all paths from an arbitrary term # # See https://networkx.github.io/documentation/development/reference/generated/networkx.algorithms.simple_paths.all_simple_paths.html # In[21]: ## number of paths ## (for the mapping of networkx to an ontology, source is root, and descendant is target) len(list(nx.all_simple_paths(G, root, t))) # In[22]: ## nx returns a list of lists, each list is a path ## Examine the first 2 list(nx.all_simple_paths(G, root, t))[0:2] # ## We (heart) pandas # # Pandas are cute. # # We use a DataFrame object, which we will construct by making a table of terms plus their pathstats # In[45]: def get_pathstats(nodes): """ for any given node, return a table row with stats """ items = [] for n in nodes: paths = list(nx.all_simple_paths(G, root, n)) longest = len(max(paths, key=lambda p: len(p))) items.append({'id':n, 'label': ont.label(n), 'pathcount': len(paths), 'longest': longest}) return items ## Test it out sample = list(ont.descendants(root))[0:20] items = get_pathstats(sample) items[0:3] # In[46]: ## Look at same table in pandas import pandas as pd df = pd.DataFrame(items) df # In[49]: ## Basic aggregate stats (over our small sample, which may not be representative) df['pathcount'].mean() # ### Plotting with plotly # # Let's do a simple barchart showing distribution of pathcounts for our sample # In[50]: import plotly.plotly as py import plotly.graph_objs as go # In[51]: data = [ go.Bar( x=df['label'], # assign x as the dataframe column 'x' y=df['pathcount'] ) ] # IPython notebook py.iplot(data, filename='pandas-bar-chart') # use this in non-notebook context # url = py.plot(data, filename='pandas-bar-chart') # ## Summarizing over whole ontology # # __warning__ this can take over an hour, if running interactively, be patient! # # __help wanted__ is there a way to make Jupyter show a progress bar for cases like this? # # In[52]: sample = list(ont.descendants(root)) items = get_pathstats(sample) items[0:3] # In[53]: len(items) # In[54]: df = pd.DataFrame(items) # In[55]: df['pathcount'].mean() # In[56]: df['pathcount'].max() # ### Plotting all HP terms # # In[57]: data = [ go.Bar( x=df['label'], # assign x as the dataframe column 'x' y=df['pathcount'] ) ] # IPython notebook py.iplot(data, filename='pandas-bar-chart-all') # In[59]: data = [ go.Scatter( x=df['longest'], # assign x as the dataframe column 'x' y=df['pathcount'], mode = 'markers' ) ] # IPython notebook py.iplot(data, filename='pandas-longest-vs-numpaths') # In[61]: max_num_paths = df['pathcount'].max() nodes_with_max = [x['id'] for x in items if x['pathcount'] == max_num_paths] nodes_with_max # In[62]: [ont.label(n) for n in nodes_with_max] # In[70]: len(nodes_with_max) # In[71]: ## Pick an arbitrary term from list t = nodes_with_max[0] # In[77]: ancs = ont.ancestors(t, reflexive=True) ancs = [a for a in ancs if a.startswith('HP:')] len(ancs) # In[80]: ## Make a sub-ontology with just term and ancestors subont = ont.subontology(ancs) # In[83]: sample_path = list(nx.all_simple_paths(G, root, t))[0] sample_path # In[84]: ## Render the sub-ontology, ## highlighting a sample path from ontobio.io.ontol_renderers import GraphRenderer w = GraphRenderer.create('png') w.outfile = 'output/multipath.png' w.write(subont,query_ids=sample_path) # ![img](output/multipath.png) # In[ ]: