#!/usr/bin/env python # coding: utf-8 # # Resolving # # A [Resolver](https://nexus-forge.readthedocs.io/en/latest/interaction.html#resolving) is used to link terms or a `Resource` to identifiers (URIs) in a knowledge graph thus addressing lexical variations # (merging of synonyms, aliases and acronyms) and disambiguating them. This feature is also referred to as entity linking # specially in the context of Natural Language Processing (NLP) when building knowledge graph from entities extracted from # text documents. # In[1]: from kgforge.core import KnowledgeGraphForge # A configuration file is needed in order to create a KnowledgeGraphForge session. A configuration can be generated using the notebook [00-Initialization.ipynb](00%20-%20Initialization.ipynb). # In[2]: forge = KnowledgeGraphForge("../../configurations/forge.yml", debug=True) # ## Imports # In[3]: from kgforge.core.commons.strategies import ResolvingStrategy from kgforge.core.resource import Resource # ## Discover configured resolvers # With the `forge.resolvers()` method, configured resolvers can be inspected. # In[4]: forge.resolvers() # The values are taken from "../../configurations/forge.yml" # A scope is a convenient (and arbitrary) way to name a given Resolver along with a set of sources of data (the `targets`) to resolve against. Resolve a resource for `female` in the 'terms' resolving scope. # ### Get resolvers as dictionary # # Passing `output="dict"` as parameter in `forge.resolvers()` returns the resolvers as a dictionary of scopes and their # respective targets. # In[5]: resolvers = forge.resolvers(output="dict") resolvers # ## DemoResolver # The DemoResolver resolve a term using str comparision and is looking up in a json file. # ### scope # Resolve the text`female` againt the 'terms' resolving scope. # In[6]: female = forge.resolve(text="female", scope="terms") # In[7]: type(female) # In[8]: print(female) # ### use exact match # In[9]: assert forge.resolve(text="feMAle", scope="terms", strategy=ResolvingStrategy.EXACT_MATCH) == None # ### now exact but case-insensitive # In[10]: print(forge.resolve(text="feMAle", scope="terms", strategy=ResolvingStrategy.EXACT_CASE_INSENSITIVE_MATCH)) # ### check it should be exact # In[11]: assert forge.resolve(text="emale", scope="terms", strategy=ResolvingStrategy.EXACT_CASE_INSENSITIVE_MATCH) == None # ### resolve with best match # In[12]: print(forge.resolve(text="emale", scope="terms", strategy=ResolvingStrategy.BEST_MATCH)) # Resolve the text `EPFL` against the 'entities' resolving scope. # In[13]: epfl = forge.resolve("EPFL", scope="entities") # In[14]: type(epfl) # In[15]: print(epfl) # ### target # In[16]: print(forge.resolve("female", scope="terms", target="sexontology")) # In[17]: print(forge.resolve("EPFL", scope="entities", target="agents")) # ### type # In[18]: print(forge.resolve("female", scope="terms", type="Class")) # In[19]: print(forge.resolve("EPFL", scope="entities", type="Organization")) # ## Strategies # Different strategies can be used to rank resolving candidates. # # In the following example, the missing 'e' at the end is intended for the demonstration. # In[20]: text = "mal" # ### best match # The default applied strategy is `strategy=ResolvingStrategy.BEST_MATCH`. # In[21]: print(forge.resolve(text, scope="terms")) # ### exact match # In[22]: print(forge.resolve(text, scope="terms", strategy=ResolvingStrategy.EXACT_MATCH)) # ### fuzzy match (all matches) # The candidates list is ordered by score. # In[23]: results = forge.resolve(text, scope="terms", strategy=ResolvingStrategy.ALL_MATCHES, limit=3) # In[24]: type(results) # In[25]: len(results) # In[26]: type(results[0]) # In[27]: print(*results, sep="\n") # ## Use case with cell types # In[28]: pyramidal = 'Pyramidal Neuron' cell_characters = "Lamp+" hard_name = "270_L5/6 NP CT CTX" # ### Exact match # In[29]: print(forge.resolve(pyramidal, scope="ontology", strategy="EXACT_MATCH")) # In[30]: print(forge.resolve(cell_characters, scope="ontology", strategy="EXACT_MATCH")) # In[31]: print(forge.resolve(hard_name, scope="ontology", strategy="EXACT_MATCH")) # when using lower cases, it will return None # In[32]: print(forge.resolve("270_L5/6 np CT CTX", scope="ontology", strategy="EXACT_MATCH")) # ### Exact case-insensitive match # In[33]: print(forge.resolve("lamp+", scope="ontology", strategy="EXACT_CASE_INSENSITIVE_MATCH")) # In[34]: print(forge.resolve("lamp+", scope="ontology", strategy="EXACT_CASE_INSENSITIVE_MATCH")) # in this case using the case-insensitive match will find the cell type # In[35]: print(forge.resolve("270_L5/6 np CT CTx", scope="ontology", strategy="EXACT_CASE_INSENSITIVE_MATCH")) # ### Best match (default) # In[36]: print(forge.resolve("2", scope="ontology")) # ### All matches # In[37]: results = forge.resolve("2", scope="ontology", strategy="ALL_MATCHES") print(*results, sep="\n") # ## Resolving a Resource # A kgforge.core.resource.Resource can be resolved. In such case and in addition to the other supported arguments, the resource property to resolve can be provided through the argument 'property_to_resolve'. The resolving result can be merge back in the input resource by setting the 'merge_inplace_as argument'. When 'merge_inplace_as' is not set then the results are returned as separate resources. # In[38]: resource = Resource(type="Agent", gender="mal") print(resource) # In[39]: resource_resolved_merged = forge.resolve(resource, scope="terms", target="sexontology", strategy=ResolvingStrategy.ALL_MATCHES, property_to_resolve="gender", merge_inplace_as="gender_resolved", threshold=0.8) # In[40]: type(resource_resolved_merged) # In[41]: print(resource_resolved_merged) # In[42]: resource_resolved_separated = forge.resolve(resource, scope="terms", target="sexontology", strategy=ResolvingStrategy.ALL_MATCHES, property_to_resolve="gender", threshold=0.8) # In[43]: type(resource_resolved_separated) # In[44]: len(resource_resolved_separated) # In[45]: print(*resource_resolved_separated, sep="\n") # ## EntityLinkerSkLearn Resolver # Based on a pretrained model and using [scikit-learn](https://scikit-learn.org/stable/index.html) to generate and rank candidates. # In[46]: print(forge.resolve("person", scope="schemaorg", target="terms", strategy=ResolvingStrategy.BEST_MATCH))