A GLAM data workbench for reluctant researchers

Tim Sherratt (@wragge)

Think of a word... any word.

Lots of interesting cultural heritage data, but...

Do we have the skills?

How do we deliver what’s needed, when it’s needed?

In [ ]:
import requests
from credentials import API_KEY
response = requests.get('http://api.digitalnz.org/v3/records.json', params={'api_key': API_KEY, 'text': '', 'facets': 'decade', 'facets_per_page': 25})
data = response.json()
print(' There are {:,} items in DigtalNZ!'.format(data['search']['result_count']))
In [ ]:
import pandas as pd
import altair as alt
alt.renderers.enable('notebook')
decades = data['search']['facets']['decade']
decades_df = pd.Series(decades).to_frame().reset_index()
decades_df.columns = ['decade', 'count']
In [ ]:
alt.Chart(decades_df).mark_bar().encode(
    x = 'decade:O',
    y = 'count:Q',
    tooltip = alt.Tooltip('count', format=',')
)
In [ ]:
params = {
    'api_key': API_KEY,
    'text': 'possum OR opossum',
    'and[display_collection][]': 'Papers Past',
    'facets': 'year,collection',
    'facets_per_page': 100
}
response = requests.get('http://api.digitalnz.org/v3/records.json', params=params)
data = response.json()
In [ ]:
titles = data['search']['facets']['collection']
titles_df = pd.Series(titles).to_frame().reset_index()
titles_df.columns = ['title', 'count']
titles_df.head()
In [ ]:
years = data['search']['facets']['year']
years_df = pd.Series(years).to_frame().reset_index()
years_df.columns = ['year', 'count']
years_df['url'] = 'https://paperspast.natlib.govt.nz/newspapers?query={0}&start_date=01-01-{1}&end_date=31-12-{1}'.format(params['text'], years_df['year'][0])
years_df.head()
In [ ]:
c1 = alt.Chart(years_df, width=600).mark_line(point=True).encode(
    x = 'year(year):T',
    y = 'count:Q',
    tooltip = [alt.Tooltip('year(year):T', title='year'), alt.Tooltip('count', format=',')],
    href='url:N'
).properties(
    height=300,
    width=500
)

c2 = alt.Chart(titles_df[1:11]).mark_bar().encode(
    x = 'count:Q',
    y = 'title:O',
    tooltip = alt.Tooltip('count', format=',')
).properties(
    height=300,
    width=200
)

c1 | c2