The BNB Linked Data Platform provides access to the British National Bibliography (BNB) published as linked open data and made available through SPARQL services.
This notebook explains how to query the repository and obtain places of publication (fields blt:publication and blt:projectedPublication ) to show an interactive map. Thanks that the works are linked to GeoNames, the records can be linked to external repositories. This notebook obtains information from Wikidata, showing the benefits of Linked Open Data.
https://github.com/hibernator11/notebook-lod-libraries
https://github.com/hibernator11/notebook-lod-libraries/blob/master/bnb-lod-extraction-map.ipynb
Author: Gustavo Candela (https://github.com/hibernator11), Research and Development department at The Biblioteca Virtual Miguel de Cervantes, University of Alicante, Spain
In this section, you can set the author from the BNB by using its identifier.
#bnbIdAuthor = 'http://bnb.data.bl.uk/id/person/DickensCharles1812-1870'
#bnbIdAuthor = 'http://bnb.data.bl.uk/id/person/BlakeWilliam1757-1827'
#bnbIdAuthor = 'http://bnb.data.bl.uk/id/person/WoolfVirginia1882-1941'
bnbIdAuthor = 'http://bnb.data.bl.uk/id/person/ShakespeareWilliam1564-1616'
# This is needed for Google Colab, first run
!pip install --upgrade folium
Collecting folium
Downloading https://files.pythonhosted.org/packages/a4/f0/44e69d50519880287cc41e7c8a6acc58daa9a9acf5f6afc52bcc70f69a6d/folium-0.11.0-py2.py3-none-any.whl (93kB)
|████████████████████████████████| 102kB 2.6MB/s
Requirement already satisfied, skipping upgrade: branca>=0.3.0 in /usr/local/lib/python3.6/dist-packages (from folium) (0.4.1)
Requirement already satisfied, skipping upgrade: requests in /usr/local/lib/python3.6/dist-packages (from folium) (2.23.0)
Requirement already satisfied, skipping upgrade: jinja2>=2.9 in /usr/local/lib/python3.6/dist-packages (from folium) (2.11.2)
Requirement already satisfied, skipping upgrade: numpy in /usr/local/lib/python3.6/dist-packages (from folium) (1.18.4)
Requirement already satisfied, skipping upgrade: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->folium) (1.24.3)
Requirement already satisfied, skipping upgrade: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->folium) (3.0.4)
Requirement already satisfied, skipping upgrade: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->folium) (2.9)
Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->folium) (2020.4.5.1)
Requirement already satisfied, skipping upgrade: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from jinja2>=2.9->folium) (1.1.1)
ERROR: datascience 0.10.6 has requirement folium==0.2.1, but you'll have folium 0.11.0 which is incompatible.
Installing collected packages: folium
Found existing installation: folium 0.8.3
Uninstalling folium-0.8.3:
Successfully uninstalled folium-0.8.3
Successfully installed folium-0.11.0
import folium
import requests
import pandas as pd
import json
import csv
import matplotlib.pyplot as plt
from pandas.io.json import json_normalize
We will use the SPARQL endpoint to create the query and configure the request to retrieve json as a result.
url = 'https://bnb.data.bl.uk/sparql'
query = """
PREFIX bibo: <http://purl.org/ontology/bibo/>
PREFIX bio: <http://purl.org/vocab/bio/0.1/>
PREFIX blt: <http://www.bl.uk/schemas/bibliographic/blterms#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX event: <http://purl.org/NET/c4dm/event.owl#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX isbd: <http://iflastandards.info/ns/isbd/elements/>
PREFIX org: <http://www.w3.org/ns/org#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdau: <http://rdaregistry.info/Elements/u/>
PREFIX madsrdf: <http://www.loc.gov/mads/rdf/v1#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX void: <http://rdfs.org/ns/void#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX umbel: <http://umbel.org/umbel#>
PREFIX schema: <http://schema.org/>
PREFIX c4dm: <http://purl.org/NET/c4dm/event.owl#>
SELECT DISTINCT ?resource ?title ?date ?place WHERE {{
graph <http://bnb.data.bl.uk/id/graph/BNBCIP> {{
?resource ?p <{0}> ;
dct:title ?title ;
schema:datePublished ?date .
OPTIONAL {{
?resource blt:projectedPublication ?publication .
?publication c4dm:place ?place .
FILTER regex(?place, "geonames", "i")
}}
OPTIONAL {{
?resource blt:publication ?publication .
?publication c4dm:place ?place .
FILTER regex(?place, "geonames", "i")
}}
}}
}} LIMIT 500
"""
query = query.format(bnbIdAuthor)
# use json as a result
headers = {'Accept': 'application/sparql-results+json'}
r = requests.get(url, params = {'format': 'application/sparql-results+json', 'query': query}, headers=headers)
print('Elements retrieved!')
#print(r.text)
Elements retrieved!
bnbdata = json.loads(r.text)
# we need to delete bnb_records.csv case it exists from previous runs
# if it doesn't exist yet, it will give an error "rm: cannot remove 'bnb_records.csv': No such file or directory", that's just fine! :)
!rm bnb_records.csv
with open('bnb_records.csv', 'w', newline='') as file:
csv_out = csv.writer(file, delimiter = ',', quotechar = '"', quoting = csv.QUOTE_MINIMAL)
#csv_out = csv.writer(open('bnb_records.csv', 'w'), delimiter = ',', quotechar = '"', quoting = csv.QUOTE_MINIMAL)
csv_out.writerow(['resource', 'place', 'title', 'date'])
for i in bnbdata['results']['bindings']:
resource = place = title = date =''
resource = i['resource']['value']
#if "place" in i:
place = i['place']['value']
title = i['title']['value']
date = i['date']['value']
csv_out.writerow([resource,place,title,date])
print('Open the generated CSV to see all its contents -- Google Colab: under "Files" > click "bnb_records.csv" -- we will also explore it bellow.')
rm: cannot remove 'bnb_records.csv': No such file or directory Open the generated CSV to see all its contents -- Google Colab: under "Files" > click "bnb_records.csv" -- we will also explore it bellow.
# Load the CSV file from GitHub.
# This puts the data in a Pandas DataFrame
df = pd.read_csv('bnb_records.csv')
df
resource | place | title | date | |
---|---|---|---|---|
0 | http://bnb.data.bl.uk/id/resource/013310275 | http://sws.geonames.org/6269131/ | Macbeth : teachit KS3 interactive pack | 2006-01 |
1 | http://bnb.data.bl.uk/id/resource/013310276 | http://sws.geonames.org/6269131/ | Much ado about nothing : teachit KS3 interacti... | 2006-01 |
2 | http://bnb.data.bl.uk/id/resource/013315368 | http://sws.geonames.org/6269131/ | Hamlet | 2006-01 |
3 | http://bnb.data.bl.uk/id/resource/019599478 | http://sws.geonames.org/6269131/ | Twelfth night | 2020-01 |
4 | http://bnb.data.bl.uk/id/resource/019599479 | http://sws.geonames.org/6269131/ | The tempest : the alexander text | 2019-11 |
... | ... | ... | ... | ... |
122 | http://bnb.data.bl.uk/id/resource/019702130 | http://sws.geonames.org/6269131/ | The tragedies | 2020-01 |
123 | http://bnb.data.bl.uk/id/resource/019702131 | http://sws.geonames.org/6269131/ | The tragicomedies | 2020-01 |
124 | http://bnb.data.bl.uk/id/resource/019755998 | http://sws.geonames.org/6269131/ | A midsummer night's dream | 2020-06 |
125 | http://bnb.data.bl.uk/id/resource/019736523 | http://sws.geonames.org/6252001/ | William Shakespeare comedies | 2020-05 |
126 | http://bnb.data.bl.uk/id/resource/019736524 | http://sws.geonames.org/6252001/ | William Shakespeare tragedies | 2020-05 |
127 rows × 4 columns
# How many items?
len(df)
127
places_by_number = df.groupby("place")["resource"].count()
places_by_number
place http://sws.geonames.org/1269750/ 2 http://sws.geonames.org/2921044/ 9 http://sws.geonames.org/6251999/ 2 http://sws.geonames.org/6252001/ 30 http://sws.geonames.org/6269131/ 84 Name: resource, dtype: int64
### We can access the count of each place
places_by_number[['http://sws.geonames.org/6269131/']][0]
84
This chart shows the number of resources by date.
ax = df['date'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Number of resources per date")
ax.set_xlabel("Dates")
ax.set_ylabel("Resources")
plt.show()
# First we create a new column in pandas with the year
df['year'] = pd.DatetimeIndex(df['date']).year
df['year']
0 2006 1 2006 2 2006 3 2020 4 2019 ... 122 2020 123 2020 124 2020 125 2020 126 2020 Name: year, Length: 127, dtype: int64
df['year'].value_counts()
2018 34 2017 26 2019 19 2020 11 2009 7 2016 5 2006 4 2012 3 2011 3 2014 2 2002 2 2007 2 2003 1 2000 1 2001 1 2010 1 2004 1 2008 1 2013 1 2015 1 1999 1 Name: year, dtype: int64
ax = df['year'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Number of resources per year")
ax.set_xlabel("Dates")
ax.set_ylabel("Resources")
plt.show()
# Get unique values
places = pd.unique(df['place']).tolist()
strplaces = ''
for a in sorted(places):
print(a)
strplaces = strplaces + ' \"' + a.replace("http://sws.geonames.org/", "").replace("/", "") + '\"'
http://sws.geonames.org/1269750/ http://sws.geonames.org/2921044/ http://sws.geonames.org/6251999/ http://sws.geonames.org/6252001/ http://sws.geonames.org/6269131/
url = 'https://query.wikidata.org/sparql'
query = """
PREFIX bibo: <http://purl.org/ontology/bibo/>
SELECT ?idgeonames ?lat ?lon ?x ?xLabel
WHERE {{
values ?idgeonames {{ {0} }}
?x wdt:P1566 ?idgeonames ;
p:P625 [
psv:P625 [
wikibase:geoLatitude ?lat ;
wikibase:geoLongitude ?lon ;
wikibase:geoGlobe ?globe ;
];
ps:P625 ?coord
]
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
}}
"""
query = query.format(strplaces)
print(query)
# use json as a result
r = requests.get(url, params = {'format': 'json', 'query': query})
geopoints = r.json()
PREFIX bibo: <http://purl.org/ontology/bibo/> SELECT ?idgeonames ?lat ?lon ?x ?xLabel WHERE { values ?idgeonames { "1269750" "2921044" "6251999" "6252001" "6269131" } ?x wdt:P1566 ?idgeonames ; p:P625 [ psv:P625 [ wikibase:geoLatitude ?lat ; wikibase:geoLongitude ?lon ; wikibase:geoGlobe ?globe ; ]; ps:P625 ?coord ] SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } }
geopoints
{'head': {'vars': ['idgeonames', 'lat', 'lon', 'x', 'xLabel']}, 'results': {'bindings': [{'idgeonames': {'type': 'literal', 'value': '2921044'}, 'lat': {'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'type': 'literal', 'value': '51.0'}, 'lon': {'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'type': 'literal', 'value': '10.0'}, 'x': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q183'}, 'xLabel': {'type': 'literal', 'value': 'Germany', 'xml:lang': 'en'}}, {'idgeonames': {'type': 'literal', 'value': '1269750'}, 'lat': {'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'type': 'literal', 'value': '22.8'}, 'lon': {'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'type': 'literal', 'value': '83.0'}, 'x': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q668'}, 'xLabel': {'type': 'literal', 'value': 'India', 'xml:lang': 'en'}}, {'idgeonames': {'type': 'literal', 'value': '6269131'}, 'lat': {'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'type': 'literal', 'value': '53.0'}, 'lon': {'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'type': 'literal', 'value': '-1.0'}, 'x': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q21'}, 'xLabel': {'type': 'literal', 'value': 'England', 'xml:lang': 'en'}}, {'idgeonames': {'type': 'literal', 'value': '6251999'}, 'lat': {'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'type': 'literal', 'value': '56.0'}, 'lon': {'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'type': 'literal', 'value': '-109.0'}, 'x': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q16'}, 'xLabel': {'type': 'literal', 'value': 'Canada', 'xml:lang': 'en'}}, {'idgeonames': {'type': 'literal', 'value': '6252001'}, 'lat': {'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'type': 'literal', 'value': '39.828175'}, 'lon': {'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'type': 'literal', 'value': '-98.5795'}, 'x': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q30'}, 'xLabel': {'type': 'literal', 'value': 'United States of America', 'xml:lang': 'en'}}]}}
map = folium.Map(location=[0,0], zoom_start=1.5)
for geo in geopoints['results']['bindings']:
idwikidata = geo['x']['value']
lat = geo['lat']['value']
lon = geo['lon']['value']
idgeonames = geo['idgeonames']['value']
label = geo['xLabel']['value']
# print(lat, lon)
# adding a text to the popup
count = places_by_number[['http://sws.geonames.org/' + idgeonames + '/']][0]
# Records that have that Country of Publication -- Obtain the Titles bellow, for the Map popup
df_local = df[df['place']=='http://sws.geonames.org/' + idgeonames + '/']
popuptext = str(count) + " records published in <a href='" + str(idwikidata) + "'>" + label + "</a><p/>Title(s):<br/><br/> * " + '<br/> * '.join(df_local['title'].values)
popup = folium.Popup(popuptext, max_width=800,min_width=200)
folium.Marker([lat,lon], popup= popup).add_to(map)
map