Here's a way you can get a random work from Trove's book
, article
, picture
, map
, music
, or collection
zones. It generates random work id prefixes and performs a wildcard search using the id
index. If the prefix returns no results, a digit is sliced off the end. If a prefix returns more than 100 results, a digit is added to the end. This continues until the result set hits the sweet spot between 0 and 100.
This method should also work ok with the format
facet, however, the further you go down the format hierarchy the smaller the slices, and therefore the harder it will be to match a work id. But certainly you should be able to get random works with specific top-level formats without any drama – for example, a random thesis from the book zone.
This method is probably not going to work for specific collections (ie with a NUC id), or in combination with other search queries. Basically, the more you limit the pool of potential resources, the harder it will be to match on random work ids. In that case you might want to try using facets.
import os
import random
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
s = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
s.mount("https://", HTTPAdapter(max_retries=retries))
s.mount("http://", HTTPAdapter(max_retries=retries))
API_URL = "http://api.trove.nla.gov.au/v2/result"
%%capture
# Load variables from the .env file if it exists
# Use %%capture to suppress messages
%load_ext dotenv
%dotenv
# Insert your Trove API key
API_KEY = "YOUR API KEY"
# Use api key value from environment variables if it is available
if os.getenv("TROVE_API_KEY"):
API_KEY = os.getenv("TROVE_API_KEY")
def get_random_work_from_zone(zone, work_format=None):
total = 0
params = {"zone": zone, "encoding": "json", "n": "100", "key": API_KEY}
if work_format:
params["l-format"] = work_format
random_id = None
random_sequence = list(range(0, 10))
random.shuffle(random_sequence)
pos = 0
while total == 0 or total > 100:
if total == 0 and random_id is None:
random_id = str(random.randrange(10000, 100000))
elif total == 0:
if len(random_id) >= 4:
random_id = random_id[:-1]
else:
random_id = str(random.randrange(10000, 100000))
if total > 100 and pos < 10:
random_id = f"{random_id}{random_sequence[pos]}"
pos += 1
elif pos == 10:
random_id = str(random.randrange(10000, 100000))
pos = 0
params["q"] = f"id:{random_id}*"
response = s.get(API_URL, params=params)
data = response.json()
total = int(data["response"]["zone"][0]["records"]["total"])
# print(total)
# print(response.url)
return random.choice(data["response"]["zone"][0]["records"]["work"])
def get_random_work():
zone = random.choice(["book", "article", "picture", "map", "music", "collection"])
work = get_random_work_from_zone(zone)
return work
get_random_work()
{'id': '41455576', 'url': '/work/41455576', 'troveUrl': 'https://trove.nla.gov.au/work/41455576', 'title': 'Iutchib chombi', 'issued': 2010, 'type': ['Video'], 'holdingsCount': 0, 'versionCount': 1, 'hasCorrections': 'N', 'relevance': {'score': '6.0', 'value': 'very relevant'}}
picture
zone¶You can specify one of book
, article
, picture
, map
, music
, or collection
. For example:
get_random_work_from_zone("picture")
{'id': '6547302', 'url': '/work/6547302', 'troveUrl': 'https://trove.nla.gov.au/work/6547302', 'title': 'Rockpool life of southern Australia / research and text by Harry Breidahl ; illustration and design by Alexis Beckett', 'contributor': ['Breidahl, Harry'], 'issued': 1990, 'type': ['Poster, chart, other'], 'holdingsCount': 1, 'versionCount': 1, 'hasCorrections': 'N', 'relevance': {'score': '6.0', 'value': 'very relevant'}}
book
zone¶get_random_work_from_zone("book", work_format="Thesis")
{'id': '5465453', 'url': '/work/5465453', 'troveUrl': 'https://trove.nla.gov.au/work/5465453', 'title': 'Studies in colloid and polymer science', 'contributor': ['Chan, Derek Y. C'], 'issued': '1974-2018', 'type': ['Thesis'], 'holdingsCount': 3, 'versionCount': 2, 'hasCorrections': 'N', 'relevance': {'score': '7.0705104', 'value': 'very relevant'}, 'identifier': [{'type': 'url', 'linktype': 'fulltext', 'value': 'http://hdl.handle.net/1885/139931'}, {'type': 'url', 'linktype': 'thumbnail', 'value': 'https://openresearch-repository.anu.edu.au/bitstream/1885/139931/5/b10167766-Chan_D.pdf.jpg'}]}
%%timeit
get_random_work()
430 ms ± 183 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Created by Tim Sherratt for the GLAM Workbench.