#!/usr/bin/env python # coding: utf-8 # # Gene Expression Simple Demo # # This shows how to query BgeeDb gene expression data ingested in Monarch # In[1]: ## Create an ontology factory in order to fetch Uberon from ontobio.ontol_factory import OntologyFactory ofactory = OntologyFactory() ont = ofactory.create("uberon") # In[6]: ## Create a sub-ontology that excludes all relations other than is-a and part-of subont = ont.subontology(relations=['subClassOf', 'BFO:0000050']) # In[7]: ## Create an association factory to get mouse gene-expression associations (sourced from bgeedb) from ontobio.assoc_factory import AssociationSetFactory afactory = AssociationSetFactory() aset = afactory.create(ontology=subont, subject_category='gene', object_category='anatomy', taxon='NCBITaxon:10090') # In[78]: # show first 5 ["{} '{}'".format(g, aset.label(g)) for g in aset.subjects[:5]] # In[11]: # fetch uberon term [liver] = ont.search('liver') liver # In[13]: liver_genes = aset.query([liver]) ["{} '{}'".format(g, aset.label(g)) for g in liver_genes] # In[36]: ## NOTE: we currently lack rank scores, see https://github.com/monarch-initiative/monarch-app/issues/1271 ## For now let's do something naive def specificity_score(g, t): """ Naive specificity score - penalize for every expression *not* in desired term, e.g. liver """ anns = aset.annotations(g) nonspecific = [a for a in anns if t!=a and t not in subont.ancestors(a) and a not in subont.ancestors(g)] return 1/(len(nonspecific)+1) ## Tuples of (gene_id, gene_symbol, score) gscores = [(g,aset.label(g),specificity_score(g,liver)) for g in liver_genes] gscores # In[38]: sorted(gscores, key=lambda x: -x[2]) # In[40]: only_in_liver = [x for x in gscores if x[2] == 1.0] only_in_liver # In[51]: ## get phenotype associations mp = ofactory.create("mp") pheno_aset = afactory.create(ontology=mp, subject_category='gene', object_category='phenotype', taxon='NCBITaxon:10090') # In[77]: ## Show phenotype anns for all genes in liver for g in liver_genes: anns = pheno_aset.annotations(g) print("{} {} {}".format(g,aset.label(g), [(a,mp.label(a)) for a in anns])) # ## More advanced expression analyses # # For more advanced analyses, see http://bgee.unil.ch # In[ ]: