DFAT Cable Finder

If you ever need to find a file in the National Archives of Australia that contains a specific numbered cable from the Department of Foreign Affairs this is the tool for you!

Just give it a cable number and it will look in the series listed below for a file that might contain the cable. For each possible match it returns a link to the file as well as a bit of information about it.

This tool works because many of the files in these series include the first and last numbered cable in the file title. So all it does is look at the numbers in each file title to see if the cable you're interested in falls somewhere between them. It's simple, but it's not something you can do in RecordSearch.

It's far from perfect because the way the file titles are constructed are not always consistent, but it's quicker than looking through all the file titles manually.

Series searched:

  • A11785 – Top Secret original and spares inward cables, annual single number series (1948-1972)
  • A11786 – Top Secret original and spares outward cables, single number series (1948-1972)
  • A3195 – Master sheets (used stencils) of inwards cables, annual single number series (1939-1949)
  • A3196 – Master sheets (used stencils) of outwards cables, annual single number series (1939-1949)
  • A6364 – Printed copies of inward cables with I (Inward) prefix filed in binders alphabetically by post (1950-1974)
  • A6366 – Printed copies of outward cables with O (Outward) prefix filed in binders alphabetically by post (1950-1974)

Let me know if you'd like additional series added. If you want to refresh the series data from RecordSearch, just delete the cables_data.json file before running a search. The tool will then reharvest all the data.

In [1]:
from recordsearch_tools.client import RSSearchClient
import math
import re
from tqdm import tqdm_notebook, tnrange
from IPython.display import display, HTML
import ipywidgets as widgets
import json
from copy import deepcopy
In [2]:
series = ['A11785', 'A11786', 'A3195 ', 'A3196', 'A6364', 'A6366']
In [4]:
def get_total_files(series):
    '''
    Get the number of files in a series.
    '''
    rsclient = RSSearchClient()
    results = rsclient.search(sort=5, digitised=False, series=series)
    return int(results['total_results'])
    
def get_files(series):
    '''
    Harvest file details from a series in RecordSearch
    '''
    all_results = []
    rsclient = RSSearchClient()
    total_results = get_total_files(series)
    pages = math.ceil(total_results/20)
    for page in tnrange(1, pages+1, desc=series, leave=False):
        results = rsclient.search(page=page, sort=5, series=series)
        all_results += results['results']
    return all_results

def refresh_data():
    '''
    Harvest data from the listed series and save the results in a json file.
    '''
    results = []
    for s in series:
        results += get_files(s)
    with open('cables_data.json', 'w') as json_file:
        json.dump(results, json_file)
    return results
        
def load_data():
    '''
    Try to load preharvested data.
    If the data file doesn't exist, harvest it.
    '''
    try:
        with open('cables_data.json', 'r') as json_file:
            results = json.load(json_file)
    except (FileNotFoundError, json.JSONDecodeError):
        results = refresh_data()
    return results

def check_year(r, year):
    keep = False
    try:
        start = int(r['contents_dates']['start_date'][:4])
        end = int(r['contents_dates']['end_date'][:4])
    except (TypeError, KeyError):
        pass
    else:
        if int(year) >= start and int(year) <= end:
            keep = True
    return keep

def find_cable(cable, series=None, year=None):
    display_results.clear_output()
    # Load pre harvested data
    results = load_data()
    try:
        cable_num = int(re.search(r'[OI0]{0,1}\.{0,1}\s*?(\d+)', cable).group(1))
    except AttributeError:
        print('Not a number')
    filtered_results = deepcopy(results)
    if series:
        filtered_results = [r for r in filtered_results if r['series'] == series]
    if year:
        filtered_results = [r for r in filtered_results if check_year(r, year) is True]       
    for result in filtered_results:
        # Start conservatively, looking for O or I in front of numbers
        cables = re.findall(r'[OI]{1}\.{0,1}\s*?(\d+)', result['title'])
        if len(cables) == 0:
            # If that didn't work find all numbers
            cables = re.findall(r'\d+', result['title'])
        if len(cables) > 2:
            # If there are too many numbers, exclude ones that look like years
            cables = [c for c in cables if not re.search(r'^19[1-9]{1}\d{1}$', c)]
        # Just right
        # print(cables)
        if len(cables) == 2:
            if cable_num >= int(cables[0]) and cable_num <= int(cables[1]):
                # Display the details of each candidate
                html = '<p><b>NAA: <a href="http://www.naa.gov.au/cgi-bin/Search?O=I&Number={}">{}, {}</a></b>'.format(result['identifier'], result['series'], result['control_symbol'])
                html += '<br>{}'.format(result['title'])
                html += '<br>{}'.format(result['contents_dates']['date_str'])
                if result['digitised_status'] is True:
                    html += '<br>Digitised: {} pages'.format(result['digitised_pages'])
                html+= '</p>'
                with display_results:
                    display(HTML(html))

def run_query(b):
    find_cable(cable.value, series=series_select.value, year=year.value)
    
    
# All the widgety things
series_options = [(s, s) for s in series]
series_options[0] = ('All', None)
series_select = widgets.Dropdown(options=series_options, description='Series:')
year = widgets.Text(value=None, placeholder='filter by year, eg 1940', description='Year:')
cable = widgets.Text(value=None, placeholder='enter cable number', description='Cable:')
display_results = widgets.Output(layout=widgets.Layout(margin='40px 0 0 0'))
button = widgets.Button(description='Find files!', button_style='primary', layout=widgets.Layout(margin='20px 0 0 0'))
button.on_click(run_query)
display(HTML('<h3>Find files containing this numbered cable</h3>'))
display(widgets.VBox([cable, 
                      widgets.HTML('<p><b>Filter by series and/or year to reduce the number of results</b></p>'), 
                      series_select, 
                      year,
                      button,
                      display_results]))

Find files containing this numbered cable

In [ ]: