#!/usr/bin/env python
# coding: utf-8

# # Resolving
# 
# A [Resolver](https://nexus-forge.readthedocs.io/en/latest/interaction.html#resolving) is used to link terms or a `Resource` to identifiers (URIs) in a knowledge graph thus addressing lexical variations
# (merging of synonyms, aliases and acronyms) and disambiguating them. This feature is also referred to as entity linking
# specially in the context of Natural Language Processing (NLP) when building knowledge graph from entities extracted from
# text documents.

# In[1]:


from kgforge.core import KnowledgeGraphForge


# A configuration file is needed in order to create a KnowledgeGraphForge session. A configuration can be generated using the notebook [00-Initialization.ipynb](00%20-%20Initialization.ipynb).

# In[2]:


forge = KnowledgeGraphForge("../../configurations/forge.yml", debug=True)


# ## Imports

# In[3]:


from kgforge.core.commons.strategies import ResolvingStrategy
from kgforge.core.resource import Resource


# ## Discover configured resolvers
# With the `forge.resolvers()` method, configured resolvers can be inspected.

# In[4]:


forge.resolvers() # The values are taken from "../../configurations/forge.yml"


# A scope is a convenient (and arbitrary) way to name a given Resolver along with a set of sources of data (the `targets`) to resolve against. Resolve a resource for `female` in the 'terms' resolving scope.

# ### Get resolvers as dictionary
# 
# Passing `output="dict"` as parameter in `forge.resolvers()` returns the resolvers as a dictionary of scopes and their
# respective targets.

# In[5]:


resolvers = forge.resolvers(output="dict")
resolvers


# ## DemoResolver

# The DemoResolver resolve a term using str comparision and is looking up in a json file.

# ### scope

#  Resolve the text`female` againt the 'terms' resolving scope.

# In[6]:


female = forge.resolve(text="female", scope="terms")


# In[7]:


type(female)


# In[8]:


print(female)


# ### use exact match

# In[9]:


assert forge.resolve(text="feMAle", scope="terms", strategy=ResolvingStrategy.EXACT_MATCH) == None


# ### now exact but case-insensitive

# In[10]:


print(forge.resolve(text="feMAle", scope="terms", strategy=ResolvingStrategy.EXACT_CASE_INSENSITIVE_MATCH))


# ### check it should be exact 

# In[11]:


assert forge.resolve(text="emale", scope="terms", strategy=ResolvingStrategy.EXACT_CASE_INSENSITIVE_MATCH) == None


# ### resolve with best match

# In[12]:


print(forge.resolve(text="emale", scope="terms", strategy=ResolvingStrategy.BEST_MATCH))


# Resolve the text `EPFL` against the 'entities' resolving scope.

# In[13]:


epfl = forge.resolve("EPFL", scope="entities")


# In[14]:


type(epfl)


# In[15]:


print(epfl)


# ### target

# In[16]:


print(forge.resolve("female", scope="terms", target="sexontology"))


# In[17]:


print(forge.resolve("EPFL", scope="entities", target="agents"))


# ### type

# In[18]:


print(forge.resolve("female", scope="terms", type="Class"))


# In[19]:


print(forge.resolve("EPFL", scope="entities", type="Organization"))


# ## Strategies

# Different strategies can be used to rank resolving candidates. 
# 
# In the following example, the missing 'e' at the end is intended for the demonstration.

# In[20]:


text = "mal"


# ### best match

# The default applied strategy is `strategy=ResolvingStrategy.BEST_MATCH`.

# In[21]:


print(forge.resolve(text, scope="terms"))


# ### exact match

# In[22]:


print(forge.resolve(text, scope="terms", strategy=ResolvingStrategy.EXACT_MATCH))


# ### fuzzy match (all matches)

# The candidates list is ordered by score.

# In[23]:


results = forge.resolve(text, scope="terms", strategy=ResolvingStrategy.ALL_MATCHES, limit=3)


# In[24]:


type(results)


# In[25]:


len(results)


# In[26]:


type(results[0])


# In[27]:


print(*results, sep="\n")


# ## Use case with cell types

# In[28]:


pyramidal = 'Pyramidal Neuron'
cell_characters = "Lamp+"
hard_name = "270_L5/6 NP CT CTX"


# ### Exact match

# In[29]:


print(forge.resolve(pyramidal, scope="ontology", strategy="EXACT_MATCH"))


# In[30]:


print(forge.resolve(cell_characters, scope="ontology", strategy="EXACT_MATCH"))


# In[31]:


print(forge.resolve(hard_name, scope="ontology", strategy="EXACT_MATCH"))


# when using lower cases, it will return None

# In[32]:


print(forge.resolve("270_L5/6 np CT CTX", scope="ontology", strategy="EXACT_MATCH"))


# ### Exact case-insensitive match

# In[33]:


print(forge.resolve("lamp+", scope="ontology", strategy="EXACT_CASE_INSENSITIVE_MATCH"))


# In[34]:


print(forge.resolve("lamp+", scope="ontology", strategy="EXACT_CASE_INSENSITIVE_MATCH"))


# in this case using the case-insensitive match will find the cell type

# In[35]:


print(forge.resolve("270_L5/6 np CT CTx", scope="ontology", strategy="EXACT_CASE_INSENSITIVE_MATCH"))


# ### Best match (default)

# In[36]:


print(forge.resolve("2", scope="ontology"))


# ### All matches

# In[37]:


results = forge.resolve("2", scope="ontology", strategy="ALL_MATCHES")
print(*results, sep="\n")


# ## Resolving a Resource
# A kgforge.core.resource.Resource can be resolved. In such case and in addition to the other supported arguments, the resource property to resolve can be provided through the argument 'property_to_resolve'. The resolving result can be merge back in the input resource by setting the 'merge_inplace_as argument'. When 'merge_inplace_as' is not set then the results are returned as separate resources.

# In[38]:


resource = Resource(type="Agent", gender="mal")
print(resource)


# In[39]:


resource_resolved_merged = forge.resolve(resource, scope="terms", target="sexontology",
                                  strategy=ResolvingStrategy.ALL_MATCHES,
                                  property_to_resolve="gender",
                                  merge_inplace_as="gender_resolved",
                                  threshold=0.8)


# In[40]:


type(resource_resolved_merged)


# In[41]:


print(resource_resolved_merged)


# In[42]:


resource_resolved_separated = forge.resolve(resource, scope="terms", target="sexontology",
                                  strategy=ResolvingStrategy.ALL_MATCHES,
                                  property_to_resolve="gender",
                                  threshold=0.8)


# In[43]:


type(resource_resolved_separated)


# In[44]:


len(resource_resolved_separated)


# In[45]:


print(*resource_resolved_separated, sep="\n")


# ## EntityLinkerSkLearn Resolver

# Based on a pretrained model and using [scikit-learn](https://scikit-learn.org/stable/index.html) to generate and rank candidates.

# In[46]:


print(forge.resolve("person", scope="schemaorg", target="terms", strategy=ResolvingStrategy.BEST_MATCH))