Load the RDF data dump

In [16]:
%%time

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from rdflib import ConjunctiveGraph

ns = {"nb": "http://bise-eu.info/core-ontology#",
      "dc": "http://dcterms/",
      "p-plan": "http://purl.org/net/p-plan#",
      "edam": "http://purl.obolibrary.org/obo/edam#"}

g = ConjunctiveGraph()
#g.parse("bise-linked-data-webapp/static/data/neubias-dump-20180129.ttl", format="turtle")
#g.parse("data-dumps/latest/neubias-latest.ttl", format="turtle")

### latest version of the RDF dataset dump
g.parse("https://github.com/NeuBIAS/bise-core-ontology/raw/master/data-dumps/latest/neubias-latest.ttl", format="turtle")
g.parse("bise-linked-data-webapp/static/data/EDAM-bioimaging_alpha03.owl")
print(str(len(g)) + ' triples in Biii data graph')
21713 triples in Biii data graph
CPU times: user 1.83 s, sys: 26.4 ms, total: 1.85 s
Wall time: 3.17 s

Level-0 entries

In [13]:
%matplotlib inline

#plt.figure(figsize=(16, 10))
plt.figure(figsize=(9, 6))

soft_dates = []
date_query = """
SELECT ?soft ?date WHERE {
    ?soft a <http://biii.eu/node/software> .
    ?soft dc:created ?date .
}
"""
results = g.query(date_query, initNs=ns)
for r in results:
    soft_dates.append({'soft':r['soft'], 'created':r['date']})

print(len(soft_dates))

df = pd.DataFrame(soft_dates)
df["created"] = df["created"].astype("datetime64")

#df2 = df.groupby([df["created"].dt.year, df["created"].dt.month]).count()
df2 = df.groupby([df["created"].dt.year]).count()

df2['soft'].cumsum().plot(kind='bar')
#df2['soft'].cumsum()
1310
Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x11b4c1be0>

Sparse entries

In [14]:
#%%time
%matplotlib inline

#plt.figure(figsize=(16, 10))
plt.figure(figsize=(9, 6))

soft_dates = []
date_query = """
SELECT DISTINCT ?soft ?date WHERE {
    ?soft a <http://biii.eu/node/software> .
    ?soft dc:created ?date .
    ?soft dc:title ?title .
    ?soft rdfs:comment ?description .
    FILTER (
        NOT EXISTS {?soft nb:hasImplementation ?i} 
        || NOT EXISTS {?soft nb:hasLocation ?loc} 
        || NOT EXISTS {?soft nb:hasIllustration ?ill} 
        || NOT EXISTS {?soft nb:hasAuthor ?auth}
        || NOT EXISTS {?soft nb:hasFunction ?func}
        || NOT EXISTS {?soft nb:openess ?lic_op}
        || NOT EXISTS {?soft nb:hasLicense ?lic}
        || NOT EXISTS {?soft nb:hasSupportedImageDimension ?dim}
        || NOT EXISTS {?soft nb:requires ?dep}
        || NOT EXISTS {?soft nb:hasProgrammingLanguage ?prog}
        || NOT EXISTS {?soft nb:hasPlatform ?plat}
        || NOT EXISTS {?soft nb:hasTopic ?topic}
        || NOT EXISTS {?soft nb:hasReferencePublication ?ref}
        || NOT EXISTS {?soft nb:hasDocumentation ?doc}
        || NOT EXISTS {?soft nb:hasComparison ?comp}
        || NOT EXISTS {?soft nb:hasDOI ?doi}
        || NOT EXISTS {?soft nb:hasUsageExample ?usag}
        || NOT EXISTS {?soft nb:hasTrainingMaterial ?train}
    )
}
"""
results = g.query(date_query, initNs=ns)
for r in results:
    soft_dates.append({'soft':r['soft'], 'created':r['date']})

print(len(soft_dates))
df = pd.DataFrame(soft_dates)
df["created"] = df["created"].astype("datetime64")

#df2 = df.groupby([df["created"].dt.year, df["created"].dt.month]).count()
df2 = df.groupby([df["created"].dt.year]).count()
#df2

df2['soft'].cumsum().plot(kind='bar')
#df2['soft'].cumsum()
758
Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0x11b49b5c0>

Detailed entries

In [11]:
%matplotlib inline

#plt.figure(figsize=(16, 10))
plt.figure(figsize=(9, 6))

soft_dates = []
date_query = """
SELECT DISTINCT ?soft ?date WHERE {
    ?soft a <http://biii.eu/node/software> .
    ?soft dc:created ?date .
    ?soft dc:title ?title .
    ?soft rdfs:comment ?description .
    ?soft nb:hasImplementation ?i .
    ?soft nb:hasLocation ?loc .
    ?soft nb:hasIllustration ?ill .
    ?soft nb:hasAuthor ?auth .
    ?soft nb:hasFunction ?func .
    ?soft nb:openess ?lic_op .
    ?soft nb:hasLicense ?lic .
    
    ?soft nb:hasSupportedImageDimension|nb:requires ?usage .
    
    ?soft nb:hasProgrammingLanguage ?prog .
    ?soft nb:hasPlatform ?plat .
    ?soft nb:hasTopic ?topic .

}
"""
results = g.query(date_query, initNs=ns)
for r in results:
    soft_dates.append({'soft':r['soft'], 'created':r['date']})

print(len(soft_dates))

df = pd.DataFrame(soft_dates)
df["created"] = df["created"].astype("datetime64")

#df2 = df.groupby([df["created"].dt.year, df["created"].dt.month]).count()
df2 = df.groupby([df["created"].dt.year]).count()

df2['soft'].cumsum().plot(kind='bar')
#df2['soft'].cumsum()

df
58
Out[11]:
created soft
0 2018-10-17 12:46:16 http://biii.eu/node/1357
1 2018-02-04 15:23:45 http://biii.eu/node/1220
2 2018-01-28 12:24:07 http://biii.eu/node/1171
3 2018-10-18 11:59:10 http://biii.eu/node/1374
4 2019-04-18 14:46:49 http://biii.eu/node/1556
5 2018-01-30 16:57:04 http://biii.eu/node/1203
6 2018-06-29 12:22:13 http://biii.eu/node/1332
7 2019-03-08 11:11:52 http://biii.eu/node/1470
8 2018-01-28 12:39:13 http://biii.eu/node/1175
9 2019-10-16 14:44:37 http://biii.eu/node/1586
10 2018-10-18 12:18:22 http://biii.eu/node/1375
11 2018-05-18 12:14:09 http://biii.eu/node/1301
12 2017-09-13 17:08:30 http://biii.eu/node/1141
13 2018-12-10 16:43:15 http://biii.eu/node/1403
14 2018-10-18 15:27:00 http://biii.eu/node/1380
15 2018-01-28 11:29:20 http://biii.eu/node/1169
16 2016-11-15 14:48:19 http://biii.eu/node/165
17 2013-10-11 16:51:29 http://biii.eu/node/73
18 2018-12-09 18:41:11 http://biii.eu/node/1397
19 2019-02-03 15:32:21 http://biii.eu/node/1430
20 2018-01-30 13:27:24 http://biii.eu/node/1196
21 2018-10-19 10:39:35 http://biii.eu/node/1382
22 2018-01-30 17:04:02 http://biii.eu/node/1204
23 2018-01-28 19:22:05 http://biii.eu/node/1190
24 2018-02-23 12:01:16 http://biii.eu/node/1225
25 2017-09-12 01:23:19 http://biii.eu/node/63
26 2018-01-28 12:38:33 http://biii.eu/node/1178
27 2013-10-21 13:52:50 http://biii.eu/node/93
28 2013-10-11 15:08:41 http://biii.eu/node/475
29 2019-02-06 09:45:27 http://biii.eu/node/1442
30 2014-12-09 17:50:19 http://biii.eu/node/304
31 2019-03-22 18:07:48 http://biii.eu/node/1490
32 2018-05-25 09:52:09 http://biii.eu/node/1313
33 2013-10-11 15:08:46 http://biii.eu/node/883
34 2018-05-19 11:28:05 http://biii.eu/node/1302
35 2018-03-20 17:53:11 http://biii.eu/node/1276
36 2017-02-13 13:24:29 http://biii.eu/node/1089
37 2019-01-02 10:09:35 http://biii.eu/node/1412
38 2013-10-11 15:08:40 http://biii.eu/node/330
39 2014-12-09 15:27:50 http://biii.eu/node/130
40 2018-01-30 16:32:42 http://biii.eu/node/1199
41 2019-03-15 12:31:40 http://biii.eu/node/1486
42 2018-10-17 16:40:37 http://biii.eu/node/1358
43 2013-10-11 15:08:40 http://biii.eu/node/324
44 2018-01-28 12:13:35 http://biii.eu/node/1172
45 2013-10-11 15:08:41 http://biii.eu/node/444
46 2017-09-11 12:49:04 http://biii.eu/node/61
47 2018-12-20 19:09:36 http://biii.eu/node/1411
48 2019-01-11 15:22:59 http://biii.eu/node/1415
49 2018-12-09 19:48:01 http://biii.eu/node/1402
50 2017-09-12 02:57:12 http://biii.eu/node/65
51 2018-10-17 16:59:34 http://biii.eu/node/1359
52 2018-01-28 12:37:30 http://biii.eu/node/1184
53 2018-10-18 10:53:18 http://biii.eu/node/1370
54 2013-10-15 20:00:59 http://biii.eu/node/77
55 2018-12-08 17:38:48 http://biii.eu/node/1392
56 2018-01-28 12:18:13 http://biii.eu/node/1179
57 2018-06-04 15:07:29 http://biii.eu/node/1325

Comprehensive entries

In [15]:
%matplotlib inline

#plt.figure(figsize=(16, 10))
plt.figure(figsize=(9, 6))

soft_dates = []
date_query = """
SELECT DISTINCT ?soft ?date WHERE {
    ?soft a <http://biii.eu/node/software> .
    ?soft dc:created ?date .
    ?soft dc:title ?title .
    ?soft rdfs:comment ?description .
    ?soft nb:hasImplementation ?i .
    ?soft nb:hasLocation ?loc .
    ?soft nb:hasIllustration ?ill .
    ?soft nb:hasAuthor ?auth .
    ?soft nb:hasFunction ?func .
    ?soft nb:openess ?lic_op .
    ?soft nb:hasLicense ?lic .
    
    ?soft nb:hasSupportedImageDimension|nb:requires ?usage .
    
    ?soft nb:hasProgrammingLanguage ?prog .
    ?soft nb:hasPlatform ?plat .
    ?soft nb:hasTopic ?topic .
    
    ?soft nb:hasReferencePublication|nb:hasDocumentation|nb:hasComparison ?ref .
    ?soft nb:hasDOI ?doi .
    ?soft nb:hasUsageExample|nb:hasTrainingMaterial ?train
    
}
"""
results = g.query(date_query, initNs=ns)
for r in results:
    soft_dates.append({'soft':r['soft'], 'created':r['date']})

print(len(soft_dates))

df = pd.DataFrame(soft_dates)
df["created"] = df["created"].astype("datetime64")

#df2 = df.groupby([df["created"].dt.year, df["created"].dt.month]).count()
df2 = df.groupby([df["created"].dt.year]).count()

df2['soft'].cumsum().plot(kind='bar')
#df2['soft'].cumsum()

df
2
Out[15]:
created soft
0 2018-12-09 18:41:11 http://biii.eu/node/1397
1 2018-03-20 17:53:11 http://biii.eu/node/1276