Tutorial: Integrate Neuroscience Datasets from Multiple Sources using MINDS¶

Initialize and configure¶

In [ ]:

!pip install nexusforge==0.7.0

In [ ]:

!pip install allensdk

In [ ]:

!pip install neurom[plotly]==3.0.1

In [ ]:

!pip install --upgrade nest-asyncio==1.5.1

Get an authentication token¶

The Nexus sandbox application can be used to get a token:

Step 1: From the web page, click on the login button in the top right corner and follow the instructions on screen.
Step 2: Once logged in, click on the button on the top right that displays your GitHub username. From the dropdown select Copy token option. This will copy the token to your clipboard.

Once a token is obtained, proceed to paste it as the value of the TOKEN variable below.

Important: A Nexus token is valid for 8 hours, if your working session is open for more than 8 hours, you may need to refresh the value of the token and reintialize the forge client in the 'Configure a forge client to store, manage and access datasets' section below.

In [ ]:

import getpass

In [ ]:

TOKEN = getpass.getpass()

Configure a forge client to store, manage and access datasets¶

In [ ]:

import uuid
import base64
import requests
import json
from pathlib import Path

from kgforge.core import KnowledgeGraphForge
from kgforge.specializations.mappings import DictionaryMapping

from allensdk.api.queries.cell_types_api import CellTypesApi
from allensdk.core.cell_types_cache import CellTypesCache

In [ ]:

r = requests.get('https://raw.githubusercontent.com/BlueBrain/nexus/ef830192d4e7bb95f9351c4bdab7b0114c27e2f0/docs/src/main/paradox/docs/getting-started/notebooks/rdfmodel/jsonldcontext.json')
dirpath = './rdfmodel'
Path(dirpath).mkdir(parents=True, exist_ok=True)
with open(f'{dirpath}/jsonldcontext.json', 'w') as outfile:
    json.dump(r.json(), outfile)

In [ ]:

ORG = "github-users"
PROJECT = ""  # Provide here the automatically created project name created when you logged into the Nexus sandbox instance.

In [ ]:

forge = KnowledgeGraphForge("https://raw.githubusercontent.com/BlueBrain/nexus/ef830192d4e7bb95f9351c4bdab7b0114c27e2f0/docs/src/main/paradox/docs/getting-started/notebooks/forge.yml",
                            bucket=f"{ORG}/{PROJECT}",
                            endpoint="https://sandbox.bluebrainnexus.io/v1",
                            token=TOKEN)

Download datasets from Allen Cell Types Database and MouseLight¶

Download mouse neuron morphologies from the Allen Cell Types Database¶

We will be downloading mouse neuron morphology data from the Allen Cell Types Database. The AllenSDK can be used for data download.

In [ ]:

ALLEN_DIR = "allen_cell_types_database"

In [ ]:

ctc = CellTypesCache(manifest_file=f"{ALLEN_DIR}/manifest.json")

In [ ]:

MAX_CELLS = 10 # Increase to include more cells
SPECIES = CellTypesApi.MOUSE

In [ ]:

nm_allen_identifiers = [cell["id"] for cell in ctc.get_cells(species=[SPECIES], require_reconstruction = True)][:MAX_CELLS]
print(f"Selected a mouse neuron with identifier: {nm_allen_identifiers}")

Select metadata

In [ ]:

with open(f"{ALLEN_DIR}/cells.json") as f:
    allen_cell_types_metadata = json.load(f)

In [ ]:

nm_allen_metadata = [neuron for neuron in allen_cell_types_metadata if neuron["specimen__id"] in nm_allen_identifiers]

Download reconstruction files

In [ ]:

for identifier in nm_allen_identifiers:
    ctc.get_reconstruction(identifier)

Download mouse neuron electrophysiology recordings from the Allen Cell Types Database¶

Download Electrophysiology recordings

In [ ]:

for identifier in nm_allen_identifiers:
    ctc.get_ephys_data(identifier)

Download mouse neuron morphologies from MouseLight project¶

We will be downloading mouse neuron morphology data from the MouseLight project.

In [ ]:

URL_GRAPHQL = "http://ml-neuronbrowser.janelia.org/graphql/"

In [ ]:

URL_JSON = "http://ml-neuronbrowser.janelia.org/export"

In [ ]:

URL_SWC = "http://ml-neuronbrowser.janelia.org/export"

Select metadata

In [ ]:

nm_request = requests.post(URL_GRAPHQL, json={"operationName":"SearchNeurons",
                                "variables":{
                                    "context":{
                                        "scope":6,
                                        "nonce":"ckzmxfe9600033e68qnwoa59t",
                                        "ccfVersion": "CCFV25",
                                        "predicates":[{
                                            "predicateType":"ANATOMICAL",
                                            "tracingIdsOrDOIs":["1"],
                                            "tracingIdsOrDOIsExactMatch":False,
                                            "tracingStructureIds":["68e76074-1777-42b6-bbf9-93a6a5f02fa4"],
                                            "nodeStructureIds":["c37953e1-a1e9-4b9a-847e-08d9566ced65"],
                                            "operatorId":None,
                                            "amount":0,
                                            "brainAreaIds":[],
                                            "arbCenter":{
                                                "x":None,
                                                "y":None,
                                                "z":None},
                                            "arbSize":None,
                                            "invert":False,
                                            "composition":3
                                        }]
                                    }
                                },
                                "query":"""query SearchNeurons($context: SearchContext) {\n  searchNeurons(context: $context) 
                                            {\n    totalCount\n    queryTime\n    nonce\n    \n    neurons {\n      id\n      
                                            idString\n      tracings {\n        id\n        tracingStructure {\n          id\n          
                                            name\n          value\n          __typename\n        }\n        soma {\n          id\n          
                                            x\n          y\n          z\n          radius\n          parentNumber\n          
                                            sampleNumber\n          brainAreaIdCcfV30\n          structureIdentifierId\n          
                                            __typename\n        }\n        __typename\n      }\n      __typename\n    }\n    
                                            __typename\n  }\n}\n"""
                               })
nm_mouselight_graphql = json.loads(nm_request.text)["data"]["searchNeurons"]["neurons"]
nm_mouselight_names = [x["idString"] for x in nm_mouselight_graphql]

In [ ]:

nm_mouselight_metadata = list()
for name in nm_mouselight_names[0:MAX_CELLS]:
    a = requests.post(URL_JSON, 
                          json={"ids": [name],"format":1},
                          headers={"Accept": "*/*", "Content-Type": "application/json"})
    #print(a.content.decode('utf-8'))
    c = a.json()
    if c["contents"]["neurons"]:
        nm_mouselight_metadata.append(c["contents"])

In [ ]:

for i, nm in enumerate(nm_mouselight_metadata):
    allneurons = nm["neurons"]
    allenId = nm["neurons"][0]["soma"]["allenId"]
    allenInfo = nm["neurons"][0]["allenInformation"]
    for info in allenInfo:
        if info["allenId"] == allenId:
            allenLabel = info["name"]
    nm_mouselight_metadata[i]["neurons"][0]["allenLabel"] = allenLabel

Download reconstruction files

In [ ]:

for name in nm_mouselight_names[0:MAX_CELLS]:
    a = requests.post(URL_SWC, 
                      json={"ids": [name],"format":0},
                      headers={"Accept": "*/*", "Content-Type": "application/json"})
    c = a.json()
    if not c["contents"]:
        print(f"Skipping empty dataset: {c}")
        continue
    
    base64_message = c["contents"]
    base64_bytes = base64_message.encode('ascii')
    message_bytes = base64.b64decode(base64_bytes)
    dirpath = './mouselight'
    Path(dirpath).mkdir(parents=True, exist_ok=True)
    with open(f"{dirpath}/{name}.swc", "wb") as f:
        f.write(message_bytes)

Mapping¶

Map the Allen Cell Types Database neuron morphologies to Neuroshapes¶

In [ ]:

allen_nm_mapping = DictionaryMapping.load("https://raw.githubusercontent.com/BlueBrain/nexus/ef830192d4e7bb95f9351c4bdab7b0114c27e2f0/docs/src/main/paradox/docs/getting-started/notebooks/mappings/allen_morphology_dataset.hjson") # TODO
nm_allen_resources = forge.map(nm_allen_metadata, allen_nm_mapping)

Map the Allen Cell Types Database neuron electrophysiology recordings to Neuroshapes¶

In [ ]:

allen_ephys_mapping = DictionaryMapping.load("https://raw.githubusercontent.com/BlueBrain/nexus/ef830192d4e7bb95f9351c4bdab7b0114c27e2f0/docs/src/main/paradox/docs/getting-started/notebooks/mappings/allen_ephys_dataset.hjson") # TODO
nephys_allen_resources = forge.map(nm_allen_metadata, allen_ephys_mapping)

Map the MouseLight neuron morphologies to Neuroshapes¶

In [ ]:

mouselight_nm_mapping = DictionaryMapping.load("https://raw.githubusercontent.com/BlueBrain/nexus/ef830192d4e7bb95f9351c4bdab7b0114c27e2f0/docs/src/main/paradox/docs/getting-started/notebooks/mappings/mouselight_dataset.hjson") # TODO
nm_mouselight_resources = forge.map(nm_mouselight_metadata, mouselight_nm_mapping)

Register¶

If the registration fails, try refreshing the access token and reinitializing the forge client in the 'Configure a forge client to store, manage and access datasets' section.

Register the Allen Cell Types Database neuron morphologies¶

In [ ]:

for resource in nm_allen_resources:
    resource.id = forge.format("identifier", "neuronmorphologies", str(uuid.uuid4()))

In [ ]:

forge.register(nm_allen_resources)

Register the Allen Cell Types Database neuron electrophysiology recordings¶

In [ ]:

for resource in nephys_allen_resources:
    resource.id = forge.format("identifier", "traces", str(uuid.uuid4()))

In [ ]:

forge.register(nephys_allen_resources)

Register the MouseLight neuron morphologies¶

In [ ]:

for resource in nm_mouselight_resources:
    resource.id = forge.format("identifier", "neuronmorphologies", str(uuid.uuid4()))

In [ ]:

forge.register(nm_mouselight_resources)

Save the created resources in JSON files¶

In [ ]:

dirpath = './database'
Path(dirpath).mkdir(parents=True, exist_ok=True)
with open(f"{dirpath}/mouselight-protocols.json","w") as f:
    json.dump(forge.as_jsonld(nm_mouselight_resources, form="expanded"),f)

In [ ]:

with open(f"{dirpath}/allen-morphologies-protocols.json","w") as f:
    json.dump(forge.as_jsonld(nm_allen_resources, form="expanded"),f)

In [ ]:

with open(f"{dirpath}/allen-ephys-protocols.json","w") as f:
    json.dump(forge.as_jsonld(nephys_allen_resources, form="expanded"),f)

Access¶

Set filters¶

In [ ]:

_type = "NeuronMorphology"

filters = {"type": _type}

Run Query¶

In [ ]:

number_of_results = 10  # You can limit the number of results, pass `None` to fetch all the results

data = forge.search(filters, limit=number_of_results)

print(f"{str(len(data))} dataset(s) of type {_type} found")

Display the results as pandas dataframe¶

In [ ]:

property_to_display = ["id","name","subject","brainLocation.brainRegion.id","brainLocation.brainRegion.label","brainLocation.layer.id","brainLocation.layer.label", "contribution","brainLocation.layer.id","brainLocation.layer.label","distribution.name","distribution.contentUrl","distribution.encodingFormat"]
reshaped_data = forge.reshape(data, keep=property_to_display)

forge.as_dataframe(reshaped_data)

Download¶

In [ ]:

dirpath = "./downloaded/"
forge.download(data, "distribution.contentUrl", dirpath)

In [ ]:

ls ./downloaded/

Display a result as 3D Neuron Morphology¶

In [ ]:

from neurom import load_morphology
from neurom.view.plotly_impl import plot_morph3d
import IPython

In [ ]:

neuron = load_morphology(f"{dirpath}/{data[0].distribution.name}")
plot_morph3d(neuron, inline=False)
IPython.display.HTML(filename='./morphology-3D.html')

Version the dataset¶

Tagging a dataset is equivalent to git tag. It allows to version a dataset.

In [ ]:

forge.tag(data, value="releaseV112")

In [ ]:

# The version argument can be specified to retrieve the dataset at a given tag.

tagged_data = forge.retrieve(id=data[0].id, version="releaseV112")

In [ ]:

forge.as_dataframe(tagged_data)

In [ ]:

data[0].description="Neuron Morphology from Allen"

In [ ]:

forge.update(data[0])

In [ ]:

non_tagged_data = forge.retrieve(id=data[0].id)

In [ ]:

forge.as_dataframe(non_tagged_data)

In [ ]: