Build a DigitalNZ API search query

You can experiment with the DigitalNZ search API by entering values in the form below. For more information on the available parameters, see the API documentation.

In [329]:
import requests
import ipywidgets as widgets
from IPython.display import HTML, Markdown
import ipyvuetify as v
import json
import IPython
from pathlib import Path
import pandas as pd
import pprint

# Display code highlighting properly
def display_source(code, language='json'):
    def _jupyterlab_repr_html_(self):
        from pygments import highlight
        from pygments.formatters import HtmlFormatter

        fmt = HtmlFormatter()
        style = "<style>{}\n{}</style>".format(
            fmt.get_style_defs(".output_html"), fmt.get_style_defs(".jp-RenderedHTML")
        )
        return style + highlight(self.data, self._get_lexer(), fmt)

    # Replace _repr_html_ with our own version that adds the 'jp-RenderedHTML' class
    # in addition to 'output_html'.
    IPython.display.Code._repr_html_ = _jupyterlab_repr_html_
    return IPython.display.Code(data=code, language=language)

API_URL = 'http://api.digitalnz.org/v3/records.json'
In [326]:
# LOAD FACET DATA

def load_facet(facet):
    df = pd.read_csv(Path('facets', f'{facet}.csv'))
    items = [str(s) for s in df['value'].to_list()]
    return sorted(items, key=str.casefold)

facets = [
    'category', 
    'creator', 
    'placename', 
    'year', 
    'decade', 
    'century', 
    'language', 
    'content_partner', 
    'rights', 
    'collection', 
    'usage',
    'copyright',
    'dc_type',
    'format',
    'subject',
    'primary_collection'
]

# Load values for selected facets so these can be used in dropdown selects
# These facets all have less than 1000 unique values
facet_values = {}
for facet in ['category', 'usage', 'content_partner', 'century', 'language', 'copyright', 'primary_collection', 'year', 'creator']:
    facet_values[facet] = load_facet(facet)

# API QUERY FUNCTIONS

def get_results(params):
    '''
    Retrieve and display API request results.
    '''
    results_out.clear_output()
    response = requests.get(API_URL, params=params)
    data = response.json()
    query_url = response.url
    with results_out:
        display(Markdown('### API request parameters\n\n'))
        display(display_source(pprint.pformat(params, indent=4), 'python'))
        display(Markdown('### API request url'))
        display(HTML(f'<a href="{query_url}">{query_url}</a>'))
        display(Markdown('### API response'))
        display(display_source(json.dumps(data, indent=2)))

def create_query(widget, event, data):
    '''
    Build the API request url from the form values.
    '''
    params = {
        'api_key': api_key.v_model,
    }
    if keywords.v_model:
        params['text'] = keywords.v_model
    if geo_bbox.v_model:
        params['geo_bbox'] = geo_bbox.v_model
    for f in filters:
        if f['value'].v_model and f['field'].v_model:
            params[f'{f["type"].v_model}[{f["field"].v_model}][]'] = f['value'].v_model
    if select_facets.v_model:
        params['facets'] = ','.join(select_facets.v_model)
        params['facets_per_page'] = facets_per_page.v_model
        params['facets_page_number'] = facets_page_number.v_model
    if sort_select.v_model:
        params['sort'] = sort_select.v_model
        if sort_direction.v_model:
            params['direction'] = sort_direction.v_model
    params['per_page'] = per_page.v_model
    params['page'] = page_number.v_model
    get_results(params)
    
# FORM VARIABLES

filters = []
filter_num = 2

# FORM EVENTS

def clear_query(widget, event, data):
    global filters
    results_out.clear_output()
    keywords.v_model = ''
    keywords.value = ''
    geo_bbox.value = ''
    geo_bbox.v_model = ''
    select_facets.value = []
    select_facets.v_model = []
    page_number.value = 1
    page_number.v_model = 1
    per_page.value = 20
    per_page.v_model = 20
    facets_page_number.value = 1
    facets_page_number.v_model = 1
    facets_per_page.value = 20
    facets_per_page.v_model = 20
    sort_select.value = ''
    sort_select.v_model = ''
    sort_direction.value = ''
    sort_direction.v_model = ''
    filters = []
    filters_out.clear_output()
    create_filters()
    display_filters()
    
     
def add_filter(widget, event, data):
    filters_out.clear_output(wait=True)
    id = len(filters)
    create_filter(id)
    display_filters()
    
# FORM DISPLAY FUNCTIONS

def containerise(element):
    if not isinstance(element, list):
        element = [element]
    return v.Container(children=element, class_='mt-0 pt-0', fluid=True)

def select_field(widget, event, data):
    id = int(widget.id.split('-')[-1])
    changed = filters[id]
    if data in facet_values:
        items = facet_values[data]
        if len(items) < 30:
            changed['value'] = v.Select(items=items, label='Value to filter by:', v_model='')
        else:
            changed['value'] = v.Combobox(items=items, label='Value to filter by:', v_model='', autocomplete=True)
        display_filters()
    elif type(changed['value']).__name__ in ['Select', 'Combobox']:
        changed['value'] = v.TextField(label='Value to filter by:', v_model='')
        display_filters()

def create_filter(id):
    global filters
    new_filter = {}
    new_filter['type'] = v.Select(
        items = ['and', 'or', 'without'],
        label = 'Filter type:',
        class_ = 'mr-4',
        v_model = 'and'
    )
    new_filter['field'] = v.Select(
        items = sorted(facets),
        label = 'Field to filter:',
        class_ = 'mr-4',
        v_model = '',
        id = f'field-select-{id}'
    )
    new_filter['value'] = v.TextField(
        label = 'Value to filter by:',
        v_model = ''
    )
    new_filter['field'].on_event('change', select_field)
    filters.append(new_filter)
    
def display_filters():
    filters_out.clear_output(wait=True)
    with filters_out:
        rows = []
        for f in filters:
            rows.append(v.Row(children=[v.Col(children=[f['type']], cols=12, sm=3), v.Col(children=[f['field']], cols=12, sm=3), v.Col(children=[f['value']], cols=12, sm=6)]))
        display(containerise(rows + [add_filter_button]))

def create_filters():
    for f in range(0, filter_num):
        create_filter(f)

# FORM WIDGETS

auth_out = widgets.Output()
text_out = widgets.Output()
filters_out = widgets.Output()
facets_out = widgets.Output()
page_params_out = widgets.Output()
geo_out = widgets.Output()
results_out = widgets.Output()
    
api_key = v.TextField(
    value = '',
    label = 'api_key',
    hint = 'Paste in your DigitalNZ API key',
    v_model = '',
    persistent_hint=True
)

keywords = v.TextField(
    value = '',
    label = 'text',
    hint = 'Keywords to search for',
    v_model = '',
    persistent_hint=True
)

geo_bbox = v.TextField(
    label='geo_bbox',
    hint='Provide N,W,S,E coordinates to filter by location',
    v_model = '',
    persistent_hint=True
)

add_filter_button = v.Btn(
    children=[v.Icon(left=True, children=['mdi-filter-plus-outline']), 'Add another filter'],
    color='', # 'success', 'info', 'warning', 'danger' or '',
    small=True
)

add_filter_button.on_event('click', add_filter)

search_button = v.Btn(
    children=['Search'],
    color='primary', # 'success', 'info', 'warning', 'danger' or ''
    class_ = 'mr-4 mt-10'
)

search_button.on_event('click', create_query)

clear_button = v.Btn(
    children=['Clear'],
    color='', # 'success', 'info', 'warning', 'danger' or ''
    class_ = 'mt-10'
)

clear_button.on_event('click', clear_query)

select_facets = v.Select(
    label='facets',
    items=facets,
    multiple=True,
    v_model=[],
    hint='Include vales for these facets',
    persistent_hint=True
)

per_page = v.Select(
    label='per_page',
    items=[0, 20, 100],
    v_model=20,
    hint='Number of search results per page',
    persistent_hint=True
)

page_number = v.TextField(
    label='page',
    v_model=1,
    type='number',
    hint='Page number of search results',
    persistent_hint=True
)

page_settings = v.Row(children=[v.Col(children=[per_page], sm=4, cols=12), v.Col(children=[page_number], sm=4, cols=12)])

facets_per_page = v.Select(
    label='facets_per_page',
    items=[20, 100, 350],
    v_model=20,
    hint='Number of facet values per page',
    persistent_hint=True
)

facets_page_number = v.TextField(
    label='facets_page',
    v_model=1,
    type='number',
    hint='Page number of facet results',
    persistent_hint=True
)

facet_page_settings = v.Row(children=[v.Col(children=[facets_per_page], cols=12, sm=4), v.Col(children=[facets_page_number], cols=12, sm=4)])

sort_select = v.Select(
    label='sort',
    items=['', 'date', 'syndication_date'],
    v_model='',
    hint='Sort results (leave blank to sort by relevance)',
    persistent_hint=True
)

sort_direction = v.Select(
    label='direction',
    items=['', 'asc', 'desc'],
    v_model='',
    hint='Direction to sort',
    persistent_hint=True
)

sort_settings = v.Row(children=[v.Col(children=[sort_select], cols=12, sm=4), v.Col(children=[sort_direction], cols=12, sm=4)])

# PREPARE FORM ELEMENTS FOR DISPLAY

with auth_out:
    display(containerise(api_key))
    
with text_out:
    display(containerise(keywords))
    
create_filters()
display_filters()
    
with geo_out:
    display(containerise(geo_bbox))
    
with page_params_out:
    display(containerise([page_settings, sort_settings]))

with facets_out:
    display(containerise([v.Row(children=[v.Col(children=[select_facets], cols=12, sm=4)]), facet_page_settings]))
    display(containerise([search_button, clear_button]))
    display(v.Divider())


# display(text_out, filters_out, facets_out, results_out)

Authentication

Head over to the developers section of the DigitalNZ site to get yourself an API key.

In [327]:
display(auth_out)

Search terms

Enter the terms you want to search for. Leave blank to match everything. For more complex keyword searches, including wildcards, proximity modifiers, fuzzy searches, and boolean operators, see the DigitalNZ search help page.

In [317]:
display(text_out)

Filters

Limit your results by applying filters to fields that are available as facets.

  • Filter type – how the filters are combined and, or, without
  • Field to filter – select from the list of fields available as facets
  • Value to filter by – if the field contains fewer than 1000 unique values, you'll be able to select a value from the dropdown list, otherwise enter a text value

You can ask for facets below to find out what values are available for the different fields.

In [318]:
display(filters_out)

Geospatial limits

Supply a geospatial bounding box to limit results to those from places within the box. The box should be in the form: North limit (latitude), West limit (longitude), South limit (latitude), East limit (longitude). For example, for results from Australia:

  • -10.6681857235, 113.338953078, -43.6345972634, 153.569469029
In [319]:
display(geo_out)

Results parameters

The per_page parameter changes the number of results return by each API request. The page parameter asks for a particular page of results within the complete results set. By creating a loop that increments the page value, you can access the complete set of results.

By default, results are sorted by relevance in descending order. You can change this by supplying values for sort and direction.

In [320]:
display(page_params_out)

Include facets

Facets give you counts of values in particular fields. You can then use those values as filters (see above), or visualise the facet results to generate an overview of your search.

You can control the number of facet values for each API request using facets_per_page. If there are more than 350 values (the maximum for facets_per_page) you can request additional pages of using facets_page.

In [321]:
display(facets_out)
display(results_out)

Created by Tim Sherratt for the GLAM Workbench.