Download the contents of a digitised file from the National Archives of Australia

Why? RecordSearch lets you download a PDF of a digitised file, but sometimes it's more convenient to work with individual images.

How? Just enter the barcode of the file in the box below and click the button. When all the images have been downloaded they'll be zipped up, and a convenient download link will be displayed.

More? Click the 'Edit App' button at the top of the page to see how this works.

In [5]:
import os
import shutil
import requests
import ipywidgets as widgets
import time
from slugify import slugify
from IPython.display import display, HTML, FileLink
from tqdm import tqdm_notebook
from recordsearch_tools.client import RSItemClient
from recordsearch_tools.utilities import retry
In [6]:
@retry(ConnectionError, tries=20, delay=10, backoff=1)
def get_images(b):
    if barcode.value:
        client = RSItemClient()
        item = client.get_summary(entity_id=barcode.value)
        if item['digitised_pages'] > 0:
            series = slugify(item['series'])
            control = slugify(item['control_symbol'])
            directory = os.path.join('data', 'images', '{}-{}-[{}]'.format(series, control, item['identifier']))
            if not os.path.exists(directory):
                os.makedirs(directory)
            for page in tqdm_notebook(range(1, item['digitised_pages'] + 1)):
                filename = '{}/{}-p{}.jpg'.format(directory, item['identifier'], page)
                if not os.path.exists(filename):
                    img_url = 'http://recordsearch.naa.gov.au/NaaMedia/ShowImage.asp?B={}&S={}&T=P'.format(item['identifier'], page)
                    response = requests.get(img_url, stream=True, verify=False)
                    response.raise_for_status()
                    with open(filename, 'wb') as out_file:
                        shutil.copyfileobj(response.raw, out_file)
                time.sleep(0.5)
            shutil.make_archive(directory, 'zip', directory)
            link = FileLink('{}.zip'.format(directory))
            display(HTML('Download zipped images:'), FileLink('{}.zip'.format(directory)))
        else:
            print('Sorry, that item has not been digitised...')
    else:
        print('You need to provide a barcode!')
In [3]:
barcode = widgets.Text(
    placeholder='Enter item barcode',
    description='Barcode:',
    disabled=False
)
display(barcode)
In [4]:
button = widgets.Button(
        description='Get images',
        disabled=False,
        button_style='primary', # 'success', 'info', 'warning', 'danger' or ''
        tooltip='Click to harvest images',
        icon=''
    )
button.on_click(get_images)
display(button)