This is a notebook demos how to query associations from IMPC include evidence and provenance modeled with SEPIO
To setup:
pip install dipper jupyter ipython
from dipper.graph.RDFGraph import RDFGraph
impc_graph = "https://data.monarchinitiative.org/ttl/impc.ttl"
graph = RDFGraph()
# Import the rdf file, this takes a minute or two
graph.parse(impc_graph, format='turtle')
<Graph identifier=https://data.monarchinitiative.org/ttl/impc.ttl (<class 'rdflib.graph.Graph'>)>
# How many subjects have a has_phenotype relation
has_phenotype = graph._getNode("RO:0002200")
len(list(graph.subjects(predicate=has_phenotype)))
19843
# How many oban associations are in the graph
from rdflib.namespace import RDF
association = graph._getNode("OBAN:association")
len(list(graph.subjects(RDF.type, association)))
19843
# How many lines of mutant phenotype evidence
mut_pheno_evidence = graph._getNode("ECO:0000015")
len(list(graph.subjects(RDF.type, mut_pheno_evidence)))
23518
Given a list of genotype and phenotype labels, create a subgraph containing the 'has phenotype' relation, and all evidence.
# Bind all namespaces in curie_map for sparql
graph.bind_all_namespaces()
genotype = "Ankrd13a<Gt(RRH308)Byg>/Ankrd13a<Gt(RRH308)Byg> [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a<Gt(RRH308)Byg>] (female)"
phenotype = "MP:0001399"
query = """
?genotype rdfs:label '{0}' .
?genotype RO:0002200 {1} .
?assoc a ?assoc_type ;
OBAN:association_has_object {1} ;
OBAN:association_has_predicate RO:0002200 ;
OBAN:association_has_subject ?genotype ;
RO:0002558 ECO:0000015 ;
SEPIO:0000007 ?evidenceline ;
SEPIO:0000015 ?assertion .
?assertion SEPIO:0000018 ?creator ;
SEPIO:0000111 ?evidenceline .
?assertion a ?assertion_type .
?creator ?creator_predicates ?creator_objects .
?evidenceline SEPIO:0000084 ?measure1 ;
SEPIO:0000085 ?study .
?evidenceline a ?ev_type .
?measure1 ?measure_predicates ?measure_objects .
?study ?study_predicates ?study_objects .
?study_objects ?stud_p ?stud_o .
?measure_objects ?meas_p ?meas_o .
""".format(genotype, phenotype)
sparql_query = """
CONSTRUCT {{
{0}
}}
WHERE {{
{0}
}}
""".format(query)
sparql_output = graph.query(sparql_query)
subGraph = RDFGraph()
for triple in sparql_output:
subGraph.add(triple)
subGraph.bind("OBAN", "http://purl.org/oban/")
print(subGraph.serialize(format='turtle').decode("utf-8"))
@prefix OBAN: <http://purl.org/oban/> . @prefix OBO: <http://purl.obolibrary.org/obo/> . @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . @prefix xml: <http://www.w3.org/XML/1998/namespace> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . <https://monarchinitiative.org/MONARCH_64baf3a9fd766d9b45b8706afc8f6e43b8a007d0> a OBAN:association ; OBO:RO_0002558 OBO:ECO_0000015 ; OBO:SEPIO_0000007 <https://monarchinitiative.org/.well-known/genid/4c55b8c702eaf8199402e1fca28aab43664e2528>, <https://monarchinitiative.org/.well-known/genid/b8a67608e0359789f5a49f8cf43f2e427d80926a> ; OBO:SEPIO_0000015 <https://monarchinitiative.org/.well-known/genid/e0339fd4e780575854a9b15a9f53440b50aa9aa7> ; OBAN:association_has_object OBO:MP_0001399 ; OBAN:association_has_predicate OBO:RO_0002200 ; OBAN:association_has_subject <https://monarchinitiative.org/MONARCH_20c701bb7b16f52735288b5bf85023bd71669aca> . <http://www.mousephenotype.org/> a <http://xmlns.com/foaf/0.1/organization> ; rdfs:label "International Mouse Phenotyping Consortium" . <https://monarchinitiative.org/.well-known/genid/04e4445c9ae9486fb28c44e40ff01f877e16884a> OBO:RO_0002353 <https://monarchinitiative.org/.well-known/genid/f9abfb75efe33f00ddbdc60be74c77d32e4f2cb1> . <https://monarchinitiative.org/.well-known/genid/370c861b680c514c26890e9811e88f230b25f7ad> OBO:RO_0002353 <https://monarchinitiative.org/.well-known/genid/d81d7bea010161bf3af538ddfaa95eca27bef5c1> . <https://monarchinitiative.org/.well-known/genid/52b640304090c1033aca385fd3e90dbdc18ca0cb> OBO:RO_0002353 <https://monarchinitiative.org/.well-known/genid/f9abfb75efe33f00ddbdc60be74c77d32e4f2cb1> . <https://monarchinitiative.org/.well-known/genid/5f238f0d997802a81db5d41410c9382b3f9d7bff> OBO:RO_0002353 <https://monarchinitiative.org/.well-known/genid/f9abfb75efe33f00ddbdc60be74c77d32e4f2cb1> . <https://monarchinitiative.org/.well-known/genid/dce47fd7382541f3202224331d338fb62a8564f1> OBO:RO_0002353 <https://monarchinitiative.org/.well-known/genid/d81d7bea010161bf3af538ddfaa95eca27bef5c1> . <https://monarchinitiative.org/.well-known/genid/e0339fd4e780575854a9b15a9f53440b50aa9aa7> a OBO:SEPIO_0000001 ; OBO:SEPIO_0000018 <http://www.mousephenotype.org/> ; OBO:SEPIO_0000111 <https://monarchinitiative.org/.well-known/genid/4c55b8c702eaf8199402e1fca28aab43664e2528>, <https://monarchinitiative.org/.well-known/genid/b8a67608e0359789f5a49f8cf43f2e427d80926a> . <https://monarchinitiative.org/.well-known/genid/e65e5afb7c929931d707b90944c49e8a5b2ef2f6> OBO:RO_0002353 <https://monarchinitiative.org/.well-known/genid/d81d7bea010161bf3af538ddfaa95eca27bef5c1> . <https://monarchinitiative.org/MONARCH_20c701bb7b16f52735288b5bf85023bd71669aca> rdfs:label "Ankrd13a<Gt(RRH308)Byg>/Ankrd13a<Gt(RRH308)Byg> [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a<Gt(RRH308)Byg>] (female)" ; OBO:RO_0002200 OBO:MP_0001399 . <https://www.mousephenotype.org/impress/parameterontologies/330/10> a <http://www.w3.org/2002/07/owl#NamedIndividual> ; rdfs:label "Whole arena resting time (Open-field)" . <https://www.mousephenotype.org/impress/parameterontologies/359/11> a <http://www.w3.org/2002/07/owl#NamedIndividual> ; rdfs:label "Locomotor activity (Modified SHIRPA)" . <https://www.mousephenotype.org/impress/protocol/10/2> a <http://www.w3.org/2002/07/owl#NamedIndividual> ; rdfs:label "Open-field" . <https://www.mousephenotype.org/impress/protocol/11/2> a <http://www.w3.org/2002/07/owl#NamedIndividual> ; rdfs:label "Modified SHIRPA" . <http://www.eumodic.org/> a <http://vivoweb.org/ontology/core#Project> ; rdfs:label "European Mouse Disease Clinic" . <http://www.sanger.ac.uk/> a <http://xmlns.com/foaf/0.1/organization> ; rdfs:label "WTSI" . <https://monarchinitiative.org/.well-known/genid/4c55b8c702eaf8199402e1fca28aab43664e2528> a OBO:ECO_0000015 ; OBO:SEPIO_0000084 <https://monarchinitiative.org/.well-known/genid/04e4445c9ae9486fb28c44e40ff01f877e16884a>, <https://monarchinitiative.org/.well-known/genid/52b640304090c1033aca385fd3e90dbdc18ca0cb>, <https://monarchinitiative.org/.well-known/genid/5f238f0d997802a81db5d41410c9382b3f9d7bff> ; OBO:SEPIO_0000085 <https://monarchinitiative.org/.well-known/genid/f9abfb75efe33f00ddbdc60be74c77d32e4f2cb1> . <https://monarchinitiative.org/.well-known/genid/b8a67608e0359789f5a49f8cf43f2e427d80926a> a OBO:ECO_0000015 ; OBO:SEPIO_0000084 <https://monarchinitiative.org/.well-known/genid/370c861b680c514c26890e9811e88f230b25f7ad>, <https://monarchinitiative.org/.well-known/genid/dce47fd7382541f3202224331d338fb62a8564f1>, <https://monarchinitiative.org/.well-known/genid/e65e5afb7c929931d707b90944c49e8a5b2ef2f6> ; OBO:SEPIO_0000085 <https://monarchinitiative.org/.well-known/genid/d81d7bea010161bf3af538ddfaa95eca27bef5c1> . <https://www.mousephenotype.org/impress/procedures/2> a <http://www.w3.org/2002/07/owl#NamedIndividual> ; rdfs:label "EUMODIC Pipeline 2" . <https://monarchinitiative.org/.well-known/genid/d81d7bea010161bf3af538ddfaa95eca27bef5c1> a OBO:OBI_0000471 ; OBO:BFO_0000050 <http://www.eumodic.org/>, <https://www.mousephenotype.org/impress/procedures/2> ; OBO:BFO_0000051 OBO:STATO_0000372, <https://www.mousephenotype.org/impress/protocol/10/2> ; OBO:SEPIO_0000017 <http://www.sanger.ac.uk/> ; OBO:SEPIO_0000114 <https://www.mousephenotype.org/impress/parameterontologies/330/10> . <https://monarchinitiative.org/.well-known/genid/f9abfb75efe33f00ddbdc60be74c77d32e4f2cb1> a OBO:OBI_0000471 ; OBO:BFO_0000050 <http://www.eumodic.org/>, <https://www.mousephenotype.org/impress/procedures/2> ; OBO:BFO_0000051 OBO:STATO_0000372, <https://www.mousephenotype.org/impress/protocol/11/2> ; OBO:SEPIO_0000017 <http://www.sanger.ac.uk/> ; OBO:SEPIO_0000114 <https://www.mousephenotype.org/impress/parameterontologies/359/11> .
# Turn this into a function
template = """
?genotype rdfs:label '{0}' .
?genotype RO:0002200 {1} .
?assoc a ?assoc_type ;
OBAN:association_has_object {1} ;
OBAN:association_has_predicate RO:0002200 ;
OBAN:association_has_subject ?genotype ;
RO:0002558 ECO:0000015 ;
SEPIO:0000007 ?evidenceline ;
SEPIO:0000015 ?assertion .
?assertion SEPIO:0000018 ?creator ;
SEPIO:0000111 ?evidenceline .
?assertion a ?assertion_type .
?creator ?creator_predicates ?creator_objects .
?evidenceline SEPIO:0000084 ?measure1 ;
SEPIO:0000085 ?study .
?evidenceline a ?ev_type .
?measure1 ?measure_predicates ?measure_objects .
?study ?study_predicates ?study_objects .
?study_objects ?stud_p ?stud_o .
?measure_objects ?meas_p ?meas_o .
"""
subGraph = RDFGraph()
subGraph.bind("OBAN", "http://purl.org/oban/")
def create_subgraph(query, graph, new_graph):
sparql_query = """
CONSTRUCT {{
{0}
}}
WHERE {{
{0}
}}
""".format(query)
sparql_output = graph.query(sparql_query)
for triple in sparql_output:
new_graph.add(triple)
g2p_list = [
["Ankrd13a<Gt(RRH308)Byg>/Ankrd13a<Gt(RRH308)Byg> [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a<Gt(RRH308)Byg>] (female)",
"MP:0001399"
],
["Ankrd13a<Gt(RRH308)Byg>/Ankrd13a<Gt(RRH308)Byg> [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a<Gt(RRH308)Byg>] (male)",
"MP:0001399"
],
["Mapkap1<tm1b(EUCOMM)Wtsi>/Mapkap1<+> [C57BL/6N-BCM-Mapkap1 EPD0609_2_F05-B] (female)",
"MP:0002753"
],
["Hbs1l<tm1a(KOMP)Wtsi>/Hbs1l<tm1a(KOMP)Wtsi> [C57BL/6N-WTSI-METC] (female)",
"MP:0005292"
],
["Gnao1<tm1b(EUCOMM)Hmgu>/Gnao1<+> [C57BL/6NTac-MRC Harwell-H-GNAO1-G05-TM1B] (male)",
"MP:0001399"
]
]
for g2p in g2p_list:
query = template.format(g2p[0], g2p[1])
create_subgraph(query, graph, subGraph)
subGraph.serialize("/home/kshefchek/impc_test.ttl", format="ttl")