#!/usr/bin/env python # coding: utf-8 # # Today's news yesterday # # Version 2 of the Trove API fixed a problem with date searching. At last you can search for articles published on a particular day! # # There's a trick though. If you want to find articles from 2 November 1942, you have to search for a date range from 1 November to 2 November. This is what the query would look like: # # ``` # date:[1942-11-01T00:00:00Z TO 1942-11-02T00:00:00Z] # ``` # # Once you know that, it's not too hard to do things like find front pages from exactly 100 years ago. This notebook shows you how. #
#

If you haven't used one of these notebooks before, they're basically web pages in which you can write, edit, and run live code. They're meant to encourage experimentation, so don't feel nervous. Just try running a few cells and see what happens!.

# #

# Some tips: #

#

#
# ## Get things ready # In[48]: import requests import datetime import arrow import random import re import shutil from IPython.core.display import display, HTML # ## Set your API key # In[71]: # This creates a variable called 'api_key', paste your key between the quotes # <-- Then click the run icon api_key = '' # This displays a message with your key print('Your API key is: {}'.format(api_key)) # ## Create a date query # In[26]: # Get today's date now = arrow.now('Australia/Canberra') # Go back in time 100 years end = now.shift(years=-100) # Subtract an extra day for the start of the date range start = end.shift(days=-1) # Format the query date_query = 'date:[{}Z TO {}Z]'.format(start.format('YYYY-MM-DDT00:00:00'), end.format('YYYY-MM-DDT00:00:00')) date_query # ## Set up API request parameters # # Note that we're adding `firstpageseq:1` to the date query. This limits results to articles on the front page. We can then get the identifier of the front page from the article record. # In[35]: # Set up parameters for our API query # <-- Click the run icon params = { 'zone': 'newspaper', 'reclevel': 'full', 'encoding': 'json', 'n': '100', 'q': '{} firstpageseq:1'.format(date_query), 'key': api_key } api_url = 'http://api.trove.nla.gov.au/v2/result' # ## Make the API request # In[56]: response = requests.get(api_url, params=params) data = response.json() articles = data['response']['zone'][0]['records']['article'] # ## Select and download a front page # # Our API request returned a maximum of 100 articles. This function selects one at random, then downloads the front page. # In[67]: def get_front_page(): # Select a random article article = random.sample(articles, 1)[0] # Get the front page identifier from the page url page_id = re.search(r'page\/(\d+)', article['trovePageUrl']).group(1) # Construct the url we need to download the image page_url = 'http://trove.nla.gov.au/ndp/imageservice/nla.news-page{}/level2'.format(page_id) # Download the page image response = requests.get(page_url, stream=True) with open('data/frontpage.jpg', 'wb') as out_file: shutil.copyfileobj(response.raw, out_file) # ## Display the front page # # First we use the function defined above to download a randomly-selected front page, and then we display it. # # Re-run this cell for a different page. # In[70]: get_front_page() # The timestamp is just to make the notebook refresh the image display(HTML(''.format(arrow.now().timestamp))) # In[ ]: