A subset of the digitised newspapers in Papers Past can be searched through DigitalNZ. Using data from the DigitalNZ API we can look at what's available.
# This cell just sets up some stuff that we'll need later
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import pandas as pd
from IPython.display import display, FileLink
from pathlib import Path
import altair as alt
alt.data_transformers.enable('default')
alt.data_transformers.disable_max_rows()
s = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[ 502, 503, 504 ])
s.mount('https://', HTTPAdapter(max_retries=retries))
# Make links in Altair open in a new tab
def blank_href():
return {
"usermeta": {
"embedOptions": {
'loader': {'target': '_blank'}
}
}
}
# register the custom theme under a chosen name
alt.themes.register('blank_href', blank_href)
# enable the newly registered theme
alt.themes.enable('blank_href')
API_URL = 'http://api.digitalnz.org/v3/records.json'
# Past your API key between the quotes
# You might need to trim off any spaces at the beginning and end
API_KEY = 'YOUR API KEY'
params = {
'and[primary_collection][]': 'Papers Past',
'facets': 'year,collection,placename',
'facets_per_page': 350,
'per_page': 0,
'api_key': API_KEY
}
Hover for details. Click to search for articles in DigitalNZ.
response = s.get(API_URL, params=params)
data = response.json()
years = [{'year': int(k), 'total': v} for k, v in data['search']['facets']['year'].items()]
titles = data['search']['facets']['collection']
try:
del(titles['Papers Past'])
except KeyError:
pass
# Fill in any missing years
df_years = pd.DataFrame(years).set_index('year')
min_year = int(df_years.index.min())
max_year = int(df_years.index.max())
idx = sorted(list(range(min_year, max_year + 1)))
df_years = df_years.reindex(idx).reset_index()
# Add a url to search in DigitalNZ
df_years['url'] = df_years['year'].apply(lambda x: f'https://digitalnz.org/records?i[primary_collection]=Papers%20Past&i[year]={x}#/')
alt.Chart(df_years).mark_bar().encode(
x='year:O',
y='total:Q',
href='url:N',
tooltip=['year:N', alt.Tooltip('total:Q', format=',')]
).properties(width=800)
Hover for details. Click to search for articles in DigitalNZ.
title_dfs = []
# Loop through titles to download year facets
for title in titles.keys():
params['and[collection][]'] = title
response = s.get(API_URL, params=params)
data = response.json()
years = [{'year': int(k), 'total': v} for k, v in data['search']['facets']['year'].items()]
# Fill in missing years
df_title = pd.DataFrame(years).set_index('year')
idx = sorted(list(range(min_year, max_year + 1)))
df_title = df_title.reindex(idx, fill_value=0).reset_index()
# Add newspaper name
df_title['newspaper'] = title
# Add url to search in DigitalNZ
df_title['url'] = df_title.apply(lambda x: f'https://digitalnz.org/records?i[primary_collection]=Papers%20Past&i[year]={x["year"]}&i[collection]={x["newspaper"]}#/', axis=1)
title_dfs.append(df_title)
df_all = pd.concat(title_dfs)
alt.Chart(df_all).mark_bar().encode(
x=alt.X('year:O'),
y=alt.Y('total:Q', title=None),
facet=alt.Facet('newspaper:N', columns=1, title=None),
href='url:N',
tooltip=['newspaper', 'year', alt.Tooltip('total', format=',')]
).properties(width=800, height=50).resolve_scale(
y='independent'
).configure_view(
strokeWidth=0
)
Created by Tim Sherratt for the GLAM Workbench. Support this project by becoming a GitHub sponsor.