#!/usr/bin/env python # coding: utf-8 # # Load the RDF data dump # In[4]: get_ipython().run_cell_magic('time', '', '\nimport pandas as pd\nimport matplotlib.pyplot as plt\nimport matplotlib.dates as mdates\n\nfrom rdflib import ConjunctiveGraph\n\nns = {"nb": "http://bise-eu.info/core-ontology#",\n "dc": "http://dcterms/",\n "p-plan": "http://purl.org/net/p-plan#",\n "edam": "http://purl.obolibrary.org/obo/edam#"}\n\ng = ConjunctiveGraph()\n\n### latest version of the RDF dataset dump\ng.parse("https://raw.githubusercontent.com/bio-tools/content/master/datasets/bise-ontology-biii-dump.ttl", format="turtle")\ng.parse("bise-linked-data-webapp/static/data/EDAM-bioimaging_alpha03.owl")\nprint(str(len(g)) + \' triples in Biii data graph\')\n') # # Level-0 entries # In[7]: get_ipython().run_line_magic('matplotlib', 'inline') plt.figure(figsize=(16, 10)) soft_dates = [] date_query = """ SELECT ?soft ?date WHERE { ?soft a . ?soft dc:created ?date . } """ results = g.query(date_query, initNs=ns) for r in results: soft_dates.append({'soft':str(r['soft']), 'created':str(r['date'])}) print(len(soft_dates)) df = pd.DataFrame(soft_dates) df["created"] = df["created"].astype("datetime64") #df2 = df.groupby([df["created"].dt.year, df["created"].dt.month]).count() df2 = df.groupby([df["created"].dt.year]).count() df2['soft'].cumsum().plot(kind='bar') #df2['soft'].cumsum() # # Sparse entries # In[8]: #%%time get_ipython().run_line_magic('matplotlib', 'inline') plt.figure(figsize=(16, 10)) #plt.figure(figsize=(9, 6)) soft_dates = [] date_query = """ SELECT DISTINCT ?soft ?date WHERE { ?soft a . ?soft dc:created ?date . ?soft dc:title ?title . ?soft rdfs:comment ?description . FILTER ( NOT EXISTS {?soft nb:hasImplementation ?i} || NOT EXISTS {?soft nb:hasLocation ?loc} || NOT EXISTS {?soft nb:hasIllustration ?ill} || NOT EXISTS {?soft nb:hasAuthor ?auth} || NOT EXISTS {?soft nb:hasFunction ?func} || NOT EXISTS {?soft nb:openess ?lic_op} || NOT EXISTS {?soft nb:hasLicense ?lic} || NOT EXISTS {?soft nb:hasSupportedImageDimension ?dim} || NOT EXISTS {?soft nb:requires ?dep} || NOT EXISTS {?soft nb:hasProgrammingLanguage ?prog} || NOT EXISTS {?soft nb:hasPlatform ?plat} || NOT EXISTS {?soft nb:hasTopic ?topic} || NOT EXISTS {?soft nb:hasReferencePublication ?ref} || NOT EXISTS {?soft nb:hasDocumentation ?doc} || NOT EXISTS {?soft nb:hasComparison ?comp} || NOT EXISTS {?soft nb:hasDOI ?doi} || NOT EXISTS {?soft nb:hasUsageExample ?usag} || NOT EXISTS {?soft nb:hasTrainingMaterial ?train} ) } """ results = g.query(date_query, initNs=ns) for r in results: soft_dates.append({'soft':str(r['soft']), 'created':str(r['date'])}) print(len(soft_dates)) df = pd.DataFrame(soft_dates) df["created"] = df["created"].astype("datetime64") #df2 = df.groupby([df["created"].dt.year, df["created"].dt.month]).count() df2 = df.groupby([df["created"].dt.year]).count() #df2 df2['soft'].cumsum().plot(kind='bar') #df2['soft'].cumsum() # # Detailed entries # In[9]: get_ipython().run_line_magic('matplotlib', 'inline') plt.figure(figsize=(16, 10)) #plt.figure(figsize=(9, 6)) soft_dates = [] date_query = """ SELECT DISTINCT ?soft ?date WHERE { ?soft a . ?soft dc:created ?date . ?soft dc:title ?title . ?soft rdfs:comment ?description . ?soft nb:hasImplementation ?i . ?soft nb:hasLocation ?loc . ?soft nb:hasIllustration ?ill . ?soft nb:hasAuthor ?auth . ?soft nb:hasFunction ?func . ?soft nb:openess ?lic_op . ?soft nb:hasLicense ?lic . ?soft nb:hasSupportedImageDimension|nb:requires ?usage . ?soft nb:hasProgrammingLanguage ?prog . ?soft nb:hasPlatform ?plat . ?soft nb:hasTopic ?topic . } """ results = g.query(date_query, initNs=ns) for r in results: soft_dates.append({'soft':str(r['soft']), 'created':str(r['date'])}) print(len(soft_dates)) df = pd.DataFrame(soft_dates) df["created"] = df["created"].astype("datetime64") #df2 = df.groupby([df["created"].dt.year, df["created"].dt.month]).count() df2 = df.groupby([df["created"].dt.year]).count() df2['soft'].cumsum().plot(kind='bar') #df2['soft'].cumsum() df # # Comprehensive entries # In[11]: get_ipython().run_line_magic('matplotlib', 'inline') plt.figure(figsize=(16, 10)) #plt.figure(figsize=(9, 6)) soft_dates = [] date_query = """ SELECT DISTINCT ?soft ?date WHERE { ?soft a . ?soft dc:created ?date . ?soft dc:title ?title . ?soft rdfs:comment ?description . ?soft nb:hasImplementation ?i . ?soft nb:hasLocation ?loc . ?soft nb:hasIllustration ?ill . ?soft nb:hasAuthor ?auth . ?soft nb:hasFunction ?func . ?soft nb:openess ?lic_op . ?soft nb:hasLicense ?lic . ?soft nb:hasSupportedImageDimension|nb:requires ?usage . ?soft nb:hasProgrammingLanguage ?prog . ?soft nb:hasPlatform ?plat . ?soft nb:hasTopic ?topic . ?soft nb:hasReferencePublication|nb:hasDocumentation|nb:hasComparison ?ref . ?soft nb:hasDOI ?doi . ?soft nb:hasUsageExample|nb:hasTrainingMaterial ?train } """ results = g.query(date_query, initNs=ns) for r in results: soft_dates.append({'soft':str(r['soft']), 'created':str(r['date'])}) print(len(soft_dates)) df = pd.DataFrame(soft_dates) df["created"] = df["created"].astype("datetime64") #df2 = df.groupby([df["created"].dt.year, df["created"].dt.month]).count() df2 = df.groupby([df["created"].dt.year]).count() df2['soft'].cumsum().plot(kind='bar') #df2['soft'].cumsum() df # In[ ]: