Data management plans (DMPs) are documents accompanying research proposals and project outputs. DMPs are created as free-form text and describe the data and tools employed in scientific investigations. They are often seen as an administrative exercise and not as an integral part of research practice. Machine Actionable DMPs (maDMPs) takes this concept further by
This notebook displays in a human-friendly way the connections embedded in a maDMP. By the end of this notebook, you will be able to succinctly display the essential components of the maDMP vision using persistent identifiers (PIDs): Open Researcher and Contributor IDs (ORCIDs), funders IDs, organizations Org IDs, Dataset IDs (DOIs).
import json
import pandas as pd
import numpy as np
from dfply import *
import altair.vega.v5 as alt
# Prepare the GraphQL client
import requests
from IPython.display import display, Markdown
from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport
_transport = RequestsHTTPTransport(
url='https://api.datacite.org/graphql',
use_json=True,
)
client = Client(
transport=_transport,
fetch_schema_from_transport=True,
)
We obtain all the data from the DataCite GraphQL API.
# Generate the GraphQL query to retrieve up to 100 outputs of University of Oxford, with at least 100 views each.
query_params = {
"id" : "https://doi.org/10.5281/zenodo.3372460",
"maxOutputs": 100,
"minViews" : 100
}
query = gql("""query getOutputs($id: ID!)
{
dataManagementPlan(id: $id) {
id
titles {
title
}
datasets: references(resourceTypeId:"dataset") {
totalCount
nodes {
id: doi
name: titles {
title
}
}
}
organisations: contributors(contributorType: "HostingInstitution") {
id
name
affiliation{
id
}
}
fundingReferences {
id: funderIdentifier
funderIdentifierType
name: funderName
}
people: creators {
id
name
affiliation{
id
}
}
}
}
""")
def get_data():
return client.execute(query, variable_values=json.dumps(query_params))["dataManagementPlan"]
Simple transformations are performed to convert the graphql response into an array that can be take by Vega.
def add_node_attributes(dataframe, parent=2):
"""Modifies each item to include attributes needed for the node visulisation
Parameters:
dataframe (dataframe): A dataframe with all the itemss
parent (int): The id of the parent node
Returns:
dataframe:Returning vthe same dataframe with new attributes
"""
if (dataframe) is None:
return pd.DataFrame()
else:
print(dataframe)
return (dataframe >>
mutate(
id = X.id,
parent = parent,
))
def create_node(array=[], parent=2):
"""creates a node for the chart and formats it
Parameters:
array (array): An array with all the itemss
parent (int): The id of the parent node
Returns:
dict:Dict with all the nodes
"""
print(array)
if len(array) == 0:
return {}
else:
# return {} if (array) is None else array
print(array[0].keys())
print(len(array))
df = add_node_attributes(pd.DataFrame(array,columns=array[0].keys()), parent)
return df.to_dict(orient='records')
def merge_nodes(dataset=[],funders=[],orgs=[],people=[]):
"""Merges all the nodes lists
Parameters:
datasets (array): dataset nodes
funders (array): funders nodes
orgs (array): orgs nodes
people (array): people nodes
Returns:
array:Array with all the nodes
"""
dataset = [] if len(dataset) == 0 else dataset
funders = [] if len(funders) == 0 else funders
orgs = [] if len(orgs) == 0 else orgs
people = [] if len(people) == 0 else people
dmp = {"id":1, "name": "dmp"}
datasets_node = {"id":2, "name": "Datasets", "parent":1}
funders_node = {"id":3, "name": "Funders", "parent":1}
organisations_node = {"id":4, "name": "Organisations", "parent":1}
people_node = {"id":5, "name": "People", "parent":1}
nodes_list = [dmp, datasets_node, funders_node,organisations_node,people_node] + dataset + funders + orgs + people
# return np.array(nodes_list, dtype=object)
return nodes_list
data = get_data()
datasets = create_node(data["datasets"]["nodes"],2)
orgs = create_node(data["organisations"],4)
people = create_node(data["people"],5)
funders = create_node(data["fundingReferences"],3)
nodes = merge_nodes(datasets, funders, orgs, people)
[] [] [{'id': 'https://orcid.org/0000-0003-0578-6033', 'name': 'Aksetøy, Laila Økdal', 'affiliation': [{'id': None}]}] dict_keys(['id', 'name', 'affiliation']) 1 id name affiliation 0 https://orcid.org/0000-0003-0578-6033 Aksetøy, Laila Økdal [{'id': None}] [{'id': 'https://doi.org/10.13039/501100000780', 'funderIdentifierType': 'Crossref Funder ID', 'name': 'European Commission'}] dict_keys(['id', 'funderIdentifierType', 'name']) 1 id funderIdentifierType \ 0 https://doi.org/10.13039/501100000780 Crossref Funder ID name 0 European Commission
def vega_template(data):
"""Injects data into the vega specification
Parameters:
data (array): Array of nodes
Returns:
VegaSpec:Specification with data
"""
return """
{
"$schema": "https://vega.github.io/schema/vega/v3.json",
"description": "An example of a radial layout for a node-link diagram of hierarchical data.",
"width": 720,
"height": 720,
"padding": 5,
"autosize": "none",
"signals": [
{"name": "labels", "value": true, "bind": {"input": "checkbox"}},
{
"name": "radius",
"value": 280,
"bind": {"input": "range", "min": 20, "max": 600}
},
{
"name": "extent",
"value": 360,
"bind": {"input": "range", "min": 0, "max": 360, "step": 1}
},
{
"name": "rotate",
"value": 0,
"bind": {"input": "range", "min": 0, "max": 360, "step": 1}
},
{
"name": "layout",
"value": "tidy",
"bind": {"input": "radio", "options": ["tidy", "cluster"]}
},
{
"name": "links",
"value": "diagonal",
"bind": {
"input": "select",
"options": ["line", "curve", "diagonal", "orthogonal"]
}
},
{"name": "originX", "update": "width / 2"},
{"name": "originY", "update": "height / 2"}
],
"data": [
{
"name": "tree",
"values": """ + data + """,
"transform": [
{"type": "stratify", "key": "id", "parentKey": "parent"},
{
"type": "tree",
"method": {"signal": "layout"},
"size": [1, {"signal": "radius"}],
"as": ["alpha", "radius", "depth", "children"]
},
{
"type": "formula",
"expr": "(rotate + extent * datum.alpha + 270) % 360",
"as": "angle"
},
{"type": "formula", "expr": "PI * datum.angle / 180", "as": "radians"},
{
"type": "formula",
"expr": "inrange(datum.angle, [90, 270])",
"as": "leftside"
},
{
"type": "formula",
"expr": "originX + datum.radius * cos(datum.radians)",
"as": "x"
},
{
"type": "formula",
"expr": "originY + datum.radius * sin(datum.radians)",
"as": "y"
}
]
},
{
"name": "links",
"source": "tree",
"transform": [
{"type": "treelinks"},
{
"type": "linkpath",
"shape": {"signal": "links"},
"orient": "radial",
"sourceX": "source.radians",
"sourceY": "source.radius",
"targetX": "target.radians",
"targetY": "target.radius"
}
]
}
],
"scales": [
{
"name": "color",
"type": "linear",
"range": {"scheme": "viridis"},
"domain": {"data": "tree", "field": "depth"},
"zero": true
}
],
"marks": [
{
"type": "path",
"from": {"data": "links"},
"encode": {
"update": {
"x": {"signal": "originX"},
"y": {"signal": "originY"},
"path": {"field": "path"},
"stroke": {"value": "#ccc"}
}
}
},
{
"type": "symbol",
"from": {"data": "tree"},
"encode": {
"enter": {"size": {"value": 300}, "stroke": {"value": "#fff"}},
"update": {
"x": {"field": "x"},
"y": {"field": "y"},
"fill": {"scale": "color", "field": "depth"}
}
}
},
{
"type": "text",
"from": {"data": "tree"},
"encode": {
"enter": {
"text": {"field": "name"},
"fontSize": {"value": 12},
"baseline": {"value": "middle"}
},
"update": {
"x": {"field": "x"},
"y": {"field": "y"},
"dx": {"signal": "(datum.leftside ? -1 : 1) * 12"},
"align": {"signal": "datum.leftside ? 'right' : 'left'"},
"opacity": {"signal": "labels ? 1 : 0"}
}
}
}
]
}
"""
alt.vega(json.loads(vega_template(json.dumps(nodes))))
<Vega 5 object> If you see this message, it means the renderer has not been properly enabled for the frontend that you are using. For more information, see https://altair-viz.github.io/user_guide/troubleshooting.html