A quick tutorial on working with the dipper models API. As input we have a dataset with gene phenotype associations, as well as a reference, evidence code, and relation.
Note to run this notebook you must first install dipper in your virtualenv: python3 setup.py install
from dipper.graph.StreamedGraph import StreamedGraph
from dipper.graph.RDFGraph import RDFGraph
from dipper.models.Model import Model
from dipper.models.assoc.Association import Assoc as Association
import pandas as pd
columns = ['variant', 'variant_label', 'variant_type',
'phenotype','relation', 'source', 'evidence', 'dbxref']
data = [
['ClinVarVariant:254143', 'C326F', 'SO:0000694',
'HP:0000748','RO:0002200', 'PMID:12503095', 'ECO:0000220',
'dbSNP:886037891']
]
# Initialize graph, here we demo the RDFGraph
# which is a subclass of RDFLib.graph()
graph = RDFGraph()
# Our model class writes to the graph and takes
# a graph object as it's only instance variable
model = Model(graph)
# Create a pandas dataframe
dataframe = pd.DataFrame(data=data, columns=columns)
for index, row in dataframe.iterrows():
# Add the triple ClinVarVariant:254143 RO:0002200 HP:0000504
# RO:0002200 is the has_phenotype relation
# HP:0000748 is the phenotype 'Inappropriate laughter', haha
model.addTriple(row['variant'], row['relation'], row['phenotype'])
# The addLabel method adds a label using the rdfs:label relation
model.addLabel(row['variant'], row['variant_label'])
# addType makes the variant an individual of a class,
# in this case SO:0000694 'SNP'
model.addType(row['variant'], row['variant_type'])
# addXref uses the relation OIO:hasDbXref
model.addXref(row['variant'], row['dbxref'])
# Serialize the graph as turtle
print(graph.serialize(format='turtle').decode("utf-8"))
@prefix ClinVarVariant: <http://www.ncbi.nlm.nih.gov/clinvar/variation/> . @prefix HP: <http://purl.obolibrary.org/obo/HP_> . @prefix OBO: <http://purl.obolibrary.org/obo/> . @prefix OIO: <http://www.geneontology.org/formats/oboInOwl#> . @prefix RO: <http://purl.obolibrary.org/obo/RO_> . @prefix SO: <http://purl.obolibrary.org/obo/SO_> . @prefix dbSNP: <http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=> . @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . @prefix xml: <http://www.w3.org/XML/1998/namespace> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . <http://www.ncbi.nlm.nih.gov/clinvar/variation/254143> a OBO:SO_0000694 ; rdfs:label "C326F" ; OBO:RO_0002200 OBO:HP_0000748 ; OIO:hasDbXref <http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=886037891> .
# Add source (publication) and evidence (ECO code) to association
association = Association(graph, 'test_source',
row['variant'], row['phenotype'],
row['relation'])
association.add_source(row['source'])
association.add_evidence(row['evidence'])
# Let's inspect the association object
association.__dict__
{'assoc_id': None, 'definedby': 'test_source', 'description': None, 'evidence': ['ECO:0000220'], 'graph': <Graph identifier=Ne4f18e2b8550490ca9fc59667b4e8f68 (<class 'dipper.graph.RDFGraph.RDFGraph'>)>, 'model': <dipper.models.Model.Model at 0x7f5abc55b208>, 'obj': 'HP:0000748', 'provenance': [], 'rel': 'RO:0002200', 'score': None, 'score_type': None, 'score_unit': None, 'source': ['PMID:12503095'], 'sub': 'ClinVarVariant:254143'}
# After the association object is composed, add it to the RDF graph
# This generates an association ID by hashing its attributes (if not manually set),
# and writes triples to the rdf graph, linking the subject, predicate, and object
# using the OBAN reification model - https://github.com/EBISPOT/OBAN
association.add_association_to_graph()
# Serialize the graph. When using RDFGraphs, see
# http://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.graph.Graph.serialize
# for a list of serialization options
print(graph.serialize(format='turtle').decode("utf-8"))
@prefix ClinVarVariant: <http://www.ncbi.nlm.nih.gov/clinvar/variation/> . @prefix ECO: <http://purl.obolibrary.org/obo/ECO_> . @prefix HP: <http://purl.obolibrary.org/obo/HP_> . @prefix MONARCH: <https://monarchinitiative.org/MONARCH_> . @prefix OBAN: <http://purl.org/oban/> . @prefix OBO: <http://purl.obolibrary.org/obo/> . @prefix OIO: <http://www.geneontology.org/formats/oboInOwl#> . @prefix PMID: <http://www.ncbi.nlm.nih.gov/pubmed/> . @prefix RO: <http://purl.obolibrary.org/obo/RO_> . @prefix SO: <http://purl.obolibrary.org/obo/SO_> . @prefix dbSNP: <http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=> . @prefix dc: <http://purl.org/dc/elements/1.1/> . @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . @prefix xml: <http://www.w3.org/XML/1998/namespace> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . <https://monarchinitiative.org/MONARCH_9bb0062d3cd43958a817322404a5f0dca52cb82e> a OBAN:association ; OBO:RO_0002558 OBO:ECO_0000220 ; dc:source <http://www.ncbi.nlm.nih.gov/pubmed/12503095> ; OBAN:association_has_object OBO:HP_0000748 ; OBAN:association_has_predicate OBO:RO_0002200 ; OBAN:association_has_subject <http://www.ncbi.nlm.nih.gov/clinvar/variation/254143> . <http://www.ncbi.nlm.nih.gov/clinvar/variation/254143> a OBO:SO_0000694 ; rdfs:label "C326F" ; OBO:RO_0002200 OBO:HP_0000748 ; OIO:hasDbXref <http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=886037891> .
# as xml
print(graph.serialize(format='pretty-xml').decode("utf-8"))
<?xml version="1.0" encoding="utf-8"?> <rdf:RDF xmlns:OIO="http://www.geneontology.org/formats/oboInOwl#" xmlns:OBO="http://purl.obolibrary.org/obo/" xmlns:OBAN="http://purl.org/oban/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" > <OBAN:association rdf:about="https://monarchinitiative.org/MONARCH_9bb0062d3cd43958a817322404a5f0dca52cb82e"> <OBAN:association_has_object rdf:resource="http://purl.obolibrary.org/obo/HP_0000748"/> <dc:source rdf:resource="http://www.ncbi.nlm.nih.gov/pubmed/12503095"/> <OBAN:association_has_predicate rdf:resource="http://purl.obolibrary.org/obo/RO_0002200"/> <OBO:RO_0002558 rdf:resource="http://purl.obolibrary.org/obo/ECO_0000220"/> <OBAN:association_has_subject> <OBO:SO_0000694 rdf:about="http://www.ncbi.nlm.nih.gov/clinvar/variation/254143"> <rdfs:label>C326F</rdfs:label> <OIO:hasDbXref rdf:resource="http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=886037891"/> <OBO:RO_0002200 rdf:resource="http://purl.obolibrary.org/obo/HP_0000748"/> </OBO:SO_0000694> </OBAN:association_has_subject> </OBAN:association> </rdf:RDF>