The BNB Linked Data Platform provides access to the British National Bibliography (BNB) published as linked open data and made available through SPARQL services.
This notebook explains how to query the repository and obtain places of publication (fields blt:publication and blt:projectedPublication ) to show an interactive map. Thanks that the works are linked to GeoNames, the records can be linked to external repositories. This notebook obtains information from Wikidata, showing the benefits of Linked Open Data.
In this section, you can set the author from the BNB by using its identifier.
#bnbIdAuthor = 'http://bnb.data.bl.uk/id/person/DickensCharles1812-1870'
#bnbIdAuthor = 'http://bnb.data.bl.uk/id/person/BlakeWilliam1757-1827'
bnbIdAuthor = 'http://bnb.data.bl.uk/id/person/WoolfVirginia1882-1941'
#bnbIdAuthor = 'http://bnb.data.bl.uk/id/person/ShakespeareWilliam1564-1616'
import folium
import requests
import pandas as pd
import json
import csv
import matplotlib.pyplot as plt
from pandas.io.json import json_normalize
We will use the SPARQL endpoint to create the query and configure the request to retrieve json as a result.
url = 'https://bnb.data.bl.uk/sparql'
query = """
PREFIX bibo: <http://purl.org/ontology/bibo/>
PREFIX bio: <http://purl.org/vocab/bio/0.1/>
PREFIX blt: <http://www.bl.uk/schemas/bibliographic/blterms#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX event: <http://purl.org/NET/c4dm/event.owl#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX isbd: <http://iflastandards.info/ns/isbd/elements/>
PREFIX org: <http://www.w3.org/ns/org#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdau: <http://rdaregistry.info/Elements/u/>
PREFIX madsrdf: <http://www.loc.gov/mads/rdf/v1#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX void: <http://rdfs.org/ns/void#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX umbel: <http://umbel.org/umbel#>
PREFIX schema: <http://schema.org/>
PREFIX c4dm: <http://purl.org/NET/c4dm/event.owl#>
SELECT DISTINCT ?resource ?title ?date ?place WHERE {{
graph <http://bnb.data.bl.uk/id/graph/BNBCIP> {{
?resource ?p <{0}> ;
dct:title ?title ;
schema:datePublished ?date .
OPTIONAL {{
?resource blt:projectedPublication ?publication .
?publication c4dm:place ?place .
FILTER regex(?place, "geonames", "i")
}}
OPTIONAL {{
?resource blt:publication ?publication .
?publication c4dm:place ?place .
FILTER regex(?place, "geonames", "i")
}}
}}
}} LIMIT 500
"""
query = query.format(bnbIdAuthor)
# use json as a result
headers = {'Accept': 'application/sparql-results+json'}
r = requests.get(url, params = {'format': 'application/sparql-results+json', 'query': query}, headers=headers)
print('Elements retrieved!')
print(r.text)
bnbdata = json.loads(r.text)
with open('bnb_records.csv', 'w', newline='') as file:
csv_out = csv.writer(file, delimiter = ',', quotechar = '"', quoting = csv.QUOTE_MINIMAL)
#csv_out = csv.writer(open('bnb_records.csv', 'w'), delimiter = ',', quotechar = '"', quoting = csv.QUOTE_MINIMAL)
csv_out.writerow(['resource', 'place', 'title', 'date'])
for i in bnbdata['results']['bindings']:
resource = place = title = date =''
resource = i['resource']['value']
#if "place" in i:
place = i['place']['value']
title = i['title']['value']
date = i['date']['value']
csv_out.writerow([resource,place,title,date])
# Load the CSV file from GitHub.
# This puts the data in a Pandas DataFrame
df = pd.read_csv('bnb_records.csv')
df
# How many items?
len(df)
places_by_number = df.groupby("place")["resource"].count()
places_by_number
### We can access the count of each place
places_by_number[['http://sws.geonames.org/6269131/']][0]
This chart shows the number of resources by date.
ax = df['date'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Number of resources per date")
ax.set_xlabel("Dates")
ax.set_ylabel("Resources")
plt.show()
# First we create a new column in pandas with the year
df['year'] = pd.DatetimeIndex(df['date']).year
df['year']
ax = df['year'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Number of resources per year")
ax.set_xlabel("Dates")
ax.set_ylabel("Resources")
plt.show()
# Get unique values
places = pd.unique(df['place']).tolist()
strplaces = ''
for a in sorted(places):
print(a)
strplaces = strplaces + ' \"' + a.replace("http://sws.geonames.org/", "").replace("/", "") + '\"'
url = 'https://query.wikidata.org/sparql'
query = """
PREFIX bibo: <http://purl.org/ontology/bibo/>
SELECT ?idgeonames ?lat ?lon ?x ?xLabel
WHERE {{
values ?idgeonames {{ {0} }}
?x wdt:P1566 ?idgeonames ;
p:P625 [
psv:P625 [
wikibase:geoLatitude ?lat ;
wikibase:geoLongitude ?lon ;
wikibase:geoGlobe ?globe ;
];
ps:P625 ?coord
]
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
}}
"""
query = query.format(strplaces)
print(query)
# use json as a result
r = requests.get(url, params = {'format': 'json', 'query': query})
geopoints = r.json()
map = folium.Map(location=[0,0], zoom_start=1.5)
for geo in geopoints['results']['bindings']:
idwikidata = geo['x']['value']
lat = geo['lat']['value']
lon = geo['lon']['value']
idgeonames = geo['idgeonames']['value']
label = geo['xLabel']['value']
print(lat, lon)
# adding a text to the popup
count = places_by_number[['http://sws.geonames.org/' + idgeonames + '/']][0]
popup = str(count) + " records published in <a hreh='" + str(idwikidata) + "'>" + label + "</a>"
folium.Marker([lat,lon], popup= popup).add_to(map)
map