Biolink Metamodel Test Notebook

In [20]:
!pip install yamlmagic
%reload_ext yamlmagic
Requirement already satisfied: yamlmagic in /Users/solbrig/.local/share/virtualenvs/jupyter-AOORh7Nt/lib/python3.7/site-packages (0.2.0)
In [21]:
from IPython.core.display import display, HTML
from types import ModuleType
from json import loads, dumps
from jsonasobj import JsonObj, as_json_object
from rdflib import Graph

from biolinkml.meta import SchemaDefinition
from biolinkml.utils.schemaloader import SchemaLoader
from biolinkml.utils.yamlutils import DupCheckYamlLoader, as_json_object as yaml_to_json

from biolinkml.generators.shexgen import ShExGenerator
from biolinkml.generators.pythongen import PythonGenerator
from biolinkml.generators.yumlgen import YumlGenerator
from biolinkml.generators.jsonldcontextgen import ContextGenerator

Basic model structure

A biolink model consists of:

  • a name
  • a uri
  • type definitions
  • slot definitions
  • class definitions
  • subset definitions

As an example, the model below defines:

In [22]:
%%yaml --loader DupCheckYamlLoader yaml
id: http://example.org/sample/example1
name: synopsis2
prefixes:
    foaf: http://xmlns.com/foaf/0.1/
    samp: http://example.org/model/
    xsd: http://www.w3.org/2001/XMLSchema#
    
default_prefix: samp

default_curi_maps:
    - semweb_context
    
default_range: string

types:
    string:
        base: str
        uri: xsd:string
    int:
        base: int
        uri: xsd:integer
    boolean:
        base: Bool
        uri: xsd:boolean
        

classes:
    person:
        description: A person, living or dead
        slots:
            - id
            - first name
            - last name
            - age
            - living
            - knows
                  
    friendly_person:
        description: Any person that knows someone
        is_a: person
        slot_usage:
            knows:
                required: True

slots:
    id:
        description: Unique identifier of a person
        identifier: true

    first name:
        description: The first name of a person
        slot_uri: foaf:firstName
        multivalued: true
        
    last name:
        description: The last name of a person
        slot_uri: foaf:lastName
        required: true
        
    living:
        description: Whether the person is alive
        range: boolean
        comments:
            - unspecified means unknown
        
    age:
        description: The age of a person if living or age of death if not
        range: int
        slot_uri: foaf:age
        
    knows:
        description: A person known by this person (indicating some level of reciprocated interaction between the parties).
        range: person
        slot_uri: foaf:knows
        multivalued: true

We can emit this model as a Python class

In [23]:
print(PythonGenerator(yaml).serialize())
# Auto generated from None by pythongen.py version: 0.2.0
# Generation date: 2019-04-09 15:50
# Schema: synopsis2
#
# id: http://example.org/sample/example1
# description:
# license:

from typing import Optional, List, Union, Dict, ClassVar
from dataclasses import dataclass
from biolinkml.utils.metamodelcore import empty_list, empty_dict
from biolinkml.utils.yamlutils import YAMLRoot
from biolinkml.utils.metamodelcore import Bool

metamodel_version = "1.3.2"

# Types
class String(str):
    pass


class Int(int):
    pass


class Boolean(Bool):
    pass


# Class references
class PersonId(str):
    pass


class FriendlyPersonId(PersonId):
    pass


@dataclass
class Person(YAMLRoot):
    """
    A person, living or dead
    """
    _inherited_slots: ClassVar[List[str]] = []

    # === person ===
    id: Union[str, PersonId]
    last_name: str
    first_name: List[str] = empty_list()
    age: Optional[int] = None
    living: Optional[Bool] = None
    knows: List[Union[str, PersonId]] = empty_list()

    def _fix_elements(self):
        super()._fix_elements()
        if not isinstance(self.id, PersonId):
            self.id = PersonId(self.id)
        self.knows = [v if isinstance(v, PersonId)
                      else PersonId(v) for v in self.knows]


@dataclass
class FriendlyPerson(Person):
    """
    Any person that knows someone
    """
    _inherited_slots: ClassVar[List[str]] = []

    # === person ===
    id: Union[str, FriendlyPersonId] = None
    last_name: str = None
    first_name: List[str] = empty_list()
    age: Optional[int] = None
    living: Optional[Bool] = None

    # === friendly_person ===
    knows: List[Union[str, PersonId]] = empty_list()

    def _fix_elements(self):
        super()._fix_elements()
        if self.id is not None and not isinstance(self.id, FriendlyPersonId):
            self.id = FriendlyPersonId(self.id)
        self.knows = [v if isinstance(v, PersonId)
                      else PersonId(v) for v in self.knows]

Compile the python into a module

In [24]:
spec = compile(PythonGenerator(yaml).serialize(), 'test', 'exec')
module = ModuleType('test')
exec(spec, module.__dict__)

We can emit a UML rendering of this model

In [25]:
display(HTML(f'<img src="{YumlGenerator(yaml).serialize()}"/>'))

We can emit a JSON-LD context for the model:

In [26]:
cntxt = loads(ContextGenerator(yaml).serialize(base="http://example.org/people/"))
print(dumps(cntxt, indent="  "))
{
  "_comments": "Auto generated from None by jsonldcontextgen.py version: 0.0.2\nGeneration date: 2019-04-09 15:50\nSchema: synopsis2\n\nid: http://example.org/sample/example1\ndescription: \nlicense: \n",
  "@context": {
    "_comments": null,
    "type": "@type",
    "foaf": "http://xmlns.com/foaf/0.1/",
    "xsd": "http://www.w3.org/2001/XMLSchema#",
    "@vocab": "http://example.org/model/",
    "age": {
      "@type": "xsd:integer",
      "@id": "foaf:age"
    },
    "first_name": {
      "@id": "foaf:firstName"
    },
    "id": "@id",
    "knows": {
      "@type": "@id",
      "@id": "foaf:knows"
    },
    "last_name": {
      "@id": "foaf:lastName"
    },
    "living": {
      "@type": "xsd:boolean"
    },
    "@base": "http://example.org/people/"
  }
}

The python model can be used to create classes

In [27]:
# Generate a person
joe_smith = module.Person(id="42", last_name="smith", first_name=['Joe', 'Bob'], age=43)
print(joe_smith)
Person(id='42', last_name='smith', first_name=['Joe', 'Bob'], age=43, living=None, knows=[])

and can be combined w/ the JSON-LD Context to generate RDF

In [28]:
# Add the context and turn it into RDF
jsonld = as_json_object(yaml_to_json(joe_smith, cntxt))
print(jsonld)
g = Graph()
g.parse(data=jsonld, format="json-ld")
print(g.serialize(format="turtle").decode())
{
   "id": "42",
   "last_name": "smith",
   "first_name": [
      "Joe",
      "Bob"
   ],
   "age": 43,
   "living": null,
   "knows": [],
   "type": "Person",
   "_comments": "Auto generated from None by jsonldcontextgen.py version: 0.0.2\nGeneration date: 2019-04-09 15:50\nSchema: synopsis2\n\nid: http://example.org/sample/example1\ndescription: \nlicense: \n",
   "@context": {
      "_comments": null,
      "type": "@type",
      "foaf": "http://xmlns.com/foaf/0.1/",
      "xsd": "http://www.w3.org/2001/XMLSchema#",
      "@vocab": "http://example.org/model/",
      "age": {
         "@type": "xsd:integer",
         "@id": "foaf:age"
      },
      "first_name": {
         "@id": "foaf:firstName"
      },
      "id": "@id",
      "knows": {
         "@type": "@id",
         "@id": "foaf:knows"
      },
      "last_name": {
         "@id": "foaf:lastName"
      },
      "living": {
         "@type": "xsd:boolean"
      },
      "@base": "http://example.org/people/"
   }
}
@prefix : <http://example.org/model/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://example.org/people/42> a :Person ;
    foaf:age 43 ;
    foaf:firstName "Bob",
        "Joe" ;
    foaf:lastName "smith" .


The model can be turned into ShEx

In [29]:
shex = ShExGenerator(yaml).serialize(collections=False)
print(shex)
BASE <http://example.org/model/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>


<String> xsd:string

<Int> xsd:integer

<Boolean> xsd:boolean

<FriendlyPerson> EXTRA rdf:type CLOSED {
    (  $<FriendlyPerson_t> (  <knows> @<Person> + ;
          &<Person_t>
       ) ;
       rdf:type [ <FriendlyPerson> ]
    )
}

<Person> EXTRA rdf:type CLOSED {
    (  $<Person_t> (  foaf:firstName @<String> * ;
          foaf:lastName @<String> ;
          foaf:age @<Int> ? ;
          <living> @<Boolean> ? ;
          foaf:knows @<Person> *
       ) ;
       rdf:type [ <Person> ]
    )
}



The ShEx can then be used to validate RDF

In [30]:
from pyshex.evaluate import evaluate
r = evaluate(g, shex, 
             start="http://example.org/model/Person", 
             focus="http://example.org/people/42")
print("Conforms" if r[0] else r[1])
Conforms
In [31]:
r = evaluate(g, shex, 
             start="http://example.org/model/FriendlyPerson", 
             focus="http://example.org/people/42")
print("Conforms" if r[0] else r[1])
  Testing <http://example.org/people/42> against shape http://example.org/model/FriendlyPerson
    Node: :Person not in value set:
	 {"values": ["http://example.org/model/FriendlyPerson"], "typ...
  Testing <http://example.org/people/42> against shape http://example.org/model/FriendlyPerson
    Node: :Person not in value set:
	 {"values": ["http://example.org/model/FriendlyPerson"], "typ...
  Testing <http://example.org/people/42> against shape http://example.org/model/FriendlyPerson
       No matching triples found for predicate rdf:type
In [ ]: