#!/usr/bin/env python # coding: utf-8 # # A GLAM data workbench for reluctant researchers # # #### Tim Sherratt (@wragge) # Think of a word... any word. # Lots of interesting cultural heritage data, but... # Do we have the skills? # * [The Carpentries](https://carpentries.org/) # * [The Programming Historian](https://programminghistorian.org/) # How do we deliver **what’s needed, when it’s needed**? # In[ ]: import requests from credentials import API_KEY response = requests.get('http://api.digitalnz.org/v3/records.json', params={'api_key': API_KEY, 'text': '', 'facets': 'decade', 'facets_per_page': 25}) data = response.json() print(' There are {:,} items in DigtalNZ!'.format(data['search']['result_count'])) # In[ ]: import pandas as pd import altair as alt alt.renderers.enable('notebook') decades = data['search']['facets']['decade'] decades_df = pd.Series(decades).to_frame().reset_index() decades_df.columns = ['decade', 'count'] # In[ ]: alt.Chart(decades_df).mark_bar().encode( x = 'decade:O', y = 'count:Q', tooltip = alt.Tooltip('count', format=',') ) # * [Jupyter](https://jupyter.org/) # * [Binder](https://mybinder.org/) # * [Exploring the Te Papa collection API](https://nbviewer.jupyter.org/github/GLAM-Workbench/te-papa-api/blob/master/Exploring-the-Te-Papa-collection-API.ipynb) # In[ ]: params = { 'api_key': API_KEY, 'text': 'possum OR opossum', 'and[display_collection][]': 'Papers Past', 'facets': 'year,collection', 'facets_per_page': 100 } response = requests.get('http://api.digitalnz.org/v3/records.json', params=params) data = response.json() # In[ ]: titles = data['search']['facets']['collection'] titles_df = pd.Series(titles).to_frame().reset_index() titles_df.columns = ['title', 'count'] titles_df.head() # In[ ]: years = data['search']['facets']['year'] years_df = pd.Series(years).to_frame().reset_index() years_df.columns = ['year', 'count'] years_df['url'] = 'https://paperspast.natlib.govt.nz/newspapers?query={0}&start_date=01-01-{1}&end_date=31-12-{1}'.format(params['text'], years_df['year'][0]) years_df.head() # In[ ]: c1 = alt.Chart(years_df, width=600).mark_line(point=True).encode( x = 'year(year):T', y = 'count:Q', tooltip = [alt.Tooltip('year(year):T', title='year'), alt.Tooltip('count', format=',')], href='url:N' ).properties( height=300, width=500 ) c2 = alt.Chart(titles_df[1:11]).mark_bar().encode( x = 'count:Q', y = 'title:O', tooltip = alt.Tooltip('count', format=',') ).properties( height=300, width=200 ) c1 | c2 # * [Harvest data from Papers Past](https://nbviewer.jupyter.org/github/GLAM-Workbench/digitalnz/blob/master/Harvest-data-from-PapersPast.ipynb) # * [GLAM Workbench](https://github.com/GLAM-Workbench)