## Create an ontology factory and use it to make an ontology object, using the handle 'go'
## This will create an ontology object using remote services
from ontobio.ontol_factory import OntologyFactory
ofa = OntologyFactory()
ont = ofa.create('go')
/Users/cjm/repos/go-notebooks/venv/lib/python3.5/site-packages/cachier/mongo_core.py:24: UserWarning: Cachier warning: pymongo was not found. MongoDB cores will not work. "Cachier warning: pymongo was not found. MongoDB cores will not work.")
## Create an association factory to get pombase GO annotations
## (this uses an 'eager' method, where all annotations are fetched from services in advance)
from ontobio.assoc_factory import AssociationSetFactory
afactory = AssociationSetFactory()
aset = afactory.create(ontology=ont, subject_category='gene', object_category='function', taxon='NCBITaxon:4896')
## Change this to your gene of interest
CDC2 = 'PomBase:SPBC11B10.09'
## Write id-labels for direct annotations to CDC2
direct_anns = aset.annotations(CDC2)
for t in direct_anns:
print(" Annotation: {id} '{label}'".format(id=t, label=ont.label(t)))
Annotation: GO:1903467 'negative regulation of mitotic DNA replication initiation' Annotation: GO:0004693 'cyclin-dependent protein serine/threonine kinase activity' Annotation: GO:0072686 'mitotic spindle' Annotation: GO:0005515 'protein binding' Annotation: GO:0031138 'negative regulation of conjugation with cellular fusion' Annotation: GO:1905785 'negative regulation of anaphase-promoting complex-dependent catabolic process' Annotation: GO:0010389 'regulation of G2/M transition of mitotic cell cycle' Annotation: GO:0005829 'cytosol' Annotation: GO:0031031 'positive regulation of septation initiation signaling' Annotation: GO:0000775 'chromosome, centromeric region' Annotation: GO:1900087 'positive regulation of G1/S transition of mitotic cell cycle' Annotation: GO:0072434 'signal transduction involved in mitotic G2 DNA damage checkpoint' Annotation: GO:0005737 'cytoplasm' Annotation: GO:1904537 'negative regulation of mitotic telomere tethering at nuclear periphery' Annotation: GO:0007089 'traversing start control point of mitotic cell cycle' Annotation: GO:0072435 'response to mitotic G2 DNA damage checkpoint signaling' Annotation: GO:1990820 'response to mitotic DNA integrity checkpoint signaling' Annotation: GO:0010971 'positive regulation of G2/M transition of mitotic cell cycle' Annotation: GO:0097472 'cyclin-dependent protein kinase activity' Annotation: GO:0004672 'protein kinase activity' Annotation: GO:1905168 'positive regulation of double-strand break repair via homologous recombination' Annotation: GO:1990023 'mitotic spindle midzone' Annotation: GO:1903465 'positive regulation of mitotic cell cycle DNA replication' Annotation: GO:0072429 'response to intra-S DNA damage checkpoint signaling' Annotation: GO:0004674 'protein serine/threonine kinase activity' Annotation: GO:0005634 'nucleus' Annotation: GO:1902845 'negative regulation of mitotic spindle elongation' Annotation: GO:0001100 'negative regulation of exit from mitosis' Annotation: GO:0005524 'ATP binding' Annotation: GO:1903380 'positive regulation of mitotic chromosome condensation' Annotation: GO:0044732 'mitotic spindle pole body' Annotation: GO:0045842 'positive regulation of mitotic metaphase/anaphase transition' Annotation: GO:0000307 'cyclin-dependent protein kinase holoenzyme complex' Annotation: GO:0000790 'nuclear chromatin' Annotation: GO:1902424 'negative regulation of attachment of mitotic spindle microtubules to kinetochore' Annotation: GO:2001033 'negative regulation of double-strand break repair via nonhomologous end joining' Annotation: GO:0098783 'correction of merotelic kinetochore attachment, mitotic' Annotation: GO:0051445 'regulation of meiotic cell cycle' Annotation: GO:0035974 'meiotic spindle pole body' Annotation: GO:0008361 'regulation of cell size'
## TODO: Annotation extensions
## Get ancestors of all direct annotated terms, following is-a and part-of
inferred_anns = ont.traverse_nodes(direct_anns, relations=['subClassOf', 'BFO:0000050'])
## Render using graphviz, highlighting directly annotated terms
from ontobio.io.ontol_renderers import GraphRenderer
w = GraphRenderer.create('png')
w.outfile = 'output/cdc2.png'
w.write_subgraph(ont, inferred_anns, query_ids=direct_anns)
## Get 3 GO ontology roots
[mf] = ont.search('molecular_function')
[bp] = ont.search('biological_process')
[cc] = ont.search('cellular_component')
roots = [mf,bp,cc]
roots
['GO:0003674', 'GO:0008150', 'GO:0005575']
## Create a mapping of all nodes to the ontology root
rootmap = ont.create_slim_mapping(subset_nodes=roots, relations='subClassOf')
## Define a simple formatter that breaks annotations into groups/slims
def print_by_group(group_nodes, anns, nodemap):
for r in group_nodes:
filtered_anns = [x for x in anns if r in nodemap[x]]
if len(filtered_anns) > 0:
print('{} {}'.format(r,ont.label(r)))
for a in filtered_anns:
print(' {} {}'.format(a,ont.label(a)))
## Print all annotations to CDC2, grouped by ontology
print_by_group(roots, direct_anns, rootmap)
GO:0003674 molecular_function GO:0097472 cyclin-dependent protein kinase activity GO:0005515 protein binding GO:0005524 ATP binding GO:0004674 protein serine/threonine kinase activity GO:0004693 cyclin-dependent protein serine/threonine kinase activity GO:0004672 protein kinase activity GO:0008150 biological_process GO:1905168 positive regulation of double-strand break repair via homologous recombination GO:1903465 positive regulation of mitotic cell cycle DNA replication GO:0010971 positive regulation of G2/M transition of mitotic cell cycle GO:1903467 negative regulation of mitotic DNA replication initiation GO:0045842 positive regulation of mitotic metaphase/anaphase transition GO:0031031 positive regulation of septation initiation signaling GO:1902424 negative regulation of attachment of mitotic spindle microtubules to kinetochore GO:1900087 positive regulation of G1/S transition of mitotic cell cycle GO:1990820 response to mitotic DNA integrity checkpoint signaling GO:0007089 traversing start control point of mitotic cell cycle GO:0072435 response to mitotic G2 DNA damage checkpoint signaling GO:0051445 regulation of meiotic cell cycle GO:2001033 negative regulation of double-strand break repair via nonhomologous end joining GO:0008361 regulation of cell size GO:1902845 negative regulation of mitotic spindle elongation GO:0010389 regulation of G2/M transition of mitotic cell cycle GO:0001100 negative regulation of exit from mitosis GO:1903380 positive regulation of mitotic chromosome condensation GO:0072429 response to intra-S DNA damage checkpoint signaling GO:0072434 signal transduction involved in mitotic G2 DNA damage checkpoint GO:0098783 correction of merotelic kinetochore attachment, mitotic GO:0031138 negative regulation of conjugation with cellular fusion GO:1904537 negative regulation of mitotic telomere tethering at nuclear periphery GO:1905785 negative regulation of anaphase-promoting complex-dependent catabolic process GO:0005575 cellular_component GO:0000775 chromosome, centromeric region GO:0044732 mitotic spindle pole body GO:0035974 meiotic spindle pole body GO:0005829 cytosol GO:0005737 cytoplasm GO:0005634 nucleus GO:0000790 nuclear chromatin GO:0000307 cyclin-dependent protein kinase holoenzyme complex GO:0072686 mitotic spindle GO:1990023 mitotic spindle midzone
Note in the report above we see some redundant annotations - nucleus and nuclear chromatin for example
Next we'll filter these out. We use a simple model of redundancy, using minimal relations, and ignoring evidence
## Create a go-basic ontology, restricted to is-a and part-of
basic_ont = ont.subontology(relations=['subClassOf', 'BFO:0000050'])
nr_anns = basic_ont.filter_redundant(direct_anns)
print("Reduced {} annotations -> {} nonredundant annotations".format(len(direct_anns), len(nr_anns)))
Reduced 40 annotations -> 32 nonredundant annotations
## Report again, this time non-redundant
print_by_group(roots, nr_anns, rootmap)
GO:0003674 molecular_function GO:0005515 protein binding GO:0005524 ATP binding GO:0004693 cyclin-dependent protein serine/threonine kinase activity GO:0008150 biological_process GO:1905168 positive regulation of double-strand break repair via homologous recombination GO:1903465 positive regulation of mitotic cell cycle DNA replication GO:1903467 negative regulation of mitotic DNA replication initiation GO:0010971 positive regulation of G2/M transition of mitotic cell cycle GO:0045842 positive regulation of mitotic metaphase/anaphase transition GO:0031031 positive regulation of septation initiation signaling GO:1902424 negative regulation of attachment of mitotic spindle microtubules to kinetochore GO:1990820 response to mitotic DNA integrity checkpoint signaling GO:0007089 traversing start control point of mitotic cell cycle GO:0072435 response to mitotic G2 DNA damage checkpoint signaling GO:0051445 regulation of meiotic cell cycle GO:2001033 negative regulation of double-strand break repair via nonhomologous end joining GO:0008361 regulation of cell size GO:1902845 negative regulation of mitotic spindle elongation GO:0001100 negative regulation of exit from mitosis GO:1903380 positive regulation of mitotic chromosome condensation GO:0072429 response to intra-S DNA damage checkpoint signaling GO:0072434 signal transduction involved in mitotic G2 DNA damage checkpoint GO:0098783 correction of merotelic kinetochore attachment, mitotic GO:0031138 negative regulation of conjugation with cellular fusion GO:1904537 negative regulation of mitotic telomere tethering at nuclear periphery GO:1905785 negative regulation of anaphase-promoting complex-dependent catabolic process GO:0005575 cellular_component GO:0000775 chromosome, centromeric region GO:0044732 mitotic spindle pole body GO:0035974 meiotic spindle pole body GO:0005829 cytosol GO:0000790 nuclear chromatin GO:0000307 cyclin-dependent protein kinase holoenzyme complex GO:1990023 mitotic spindle midzone
slim_nodes = ont.extract_subset('goslim_pombe')
slim_map = ont.create_slim_mapping(subset='goslim_pombe', relations=['subClassOf', 'BFO:0000050'])
## Report, with breakdown by slim
print_by_group(slim_nodes, nr_anns, slim_map)
GO:1901990 regulation of mitotic cell cycle phase transition GO:0010971 positive regulation of G2/M transition of mitotic cell cycle GO:0045842 positive regulation of mitotic metaphase/anaphase transition GO:0031031 positive regulation of septation initiation signaling GO:0007089 traversing start control point of mitotic cell cycle GO:0001100 negative regulation of exit from mitosis GO:0072434 signal transduction involved in mitotic G2 DNA damage checkpoint GO:0000070 mitotic sister chromatid segregation GO:0098783 correction of merotelic kinetochore attachment, mitotic GO:0008150 biological_process GO:1905168 positive regulation of double-strand break repair via homologous recombination GO:1903465 positive regulation of mitotic cell cycle DNA replication GO:1903467 negative regulation of mitotic DNA replication initiation GO:0010971 positive regulation of G2/M transition of mitotic cell cycle GO:0045842 positive regulation of mitotic metaphase/anaphase transition GO:0031031 positive regulation of septation initiation signaling GO:1902424 negative regulation of attachment of mitotic spindle microtubules to kinetochore GO:1990820 response to mitotic DNA integrity checkpoint signaling GO:0007089 traversing start control point of mitotic cell cycle GO:0072435 response to mitotic G2 DNA damage checkpoint signaling GO:0051445 regulation of meiotic cell cycle GO:2001033 negative regulation of double-strand break repair via nonhomologous end joining GO:0008361 regulation of cell size GO:1902845 negative regulation of mitotic spindle elongation GO:0001100 negative regulation of exit from mitosis GO:0004693 cyclin-dependent protein serine/threonine kinase activity GO:1903380 positive regulation of mitotic chromosome condensation GO:0072429 response to intra-S DNA damage checkpoint signaling GO:0072434 signal transduction involved in mitotic G2 DNA damage checkpoint GO:0098783 correction of merotelic kinetochore attachment, mitotic GO:0031138 negative regulation of conjugation with cellular fusion GO:1904537 negative regulation of mitotic telomere tethering at nuclear periphery GO:1905785 negative regulation of anaphase-promoting complex-dependent catabolic process GO:0023052 signaling GO:0072434 signal transduction involved in mitotic G2 DNA damage checkpoint