In another notebook we harvested a list of species from the Museum of Victoria using their collection API and saved the results as a CSV file.
Here we'll search for specimens matching each of the species and save the total number of records.
We'll use these search parameters:
recordtype
which we'll set to 'specimen'taxon
which we'll set the the species' taxon nameimport requests
from tqdm.auto import tqdm
import pandas as pd
SEARCH_URL = 'https://collections.museumsvictoria.com.au/api/search'
Load the CSV file containing the list of species.
df_species = pd.read_csv('museum-victoria-species.csv')
df_species.head()
id | taxon_name | common_name | |
---|---|---|---|
0 | species/8583 | Melangyna viridiceps | Common Hover Fly |
1 | species/8307 | Tetractenos glaber | Smooth Toadfish |
2 | species/8815 | Salticidae | Jumping Spider |
3 | species/8456 | Hydromys chrysogaster | Common Water Rat |
4 | species/12377 | Dromaius novaehollandiae | Emu |
def get_totals(params):
'''
Get the total number of results and pages returned by a search.
'''
response = requests.get(SEARCH_URL, params=params, headers={'User-Agent': 'Mozilla/5.0'})
# The total results and pages values are in the API response's headers!
total_results = int(response.headers['Total-Results'])
total_pages = int(response.headers['Total-Pages'])
return (total_results, total_pages)
def get_specimen_totals(species):
'''
Find the number of specimens matching each species.
'''
params = {
'recordtype': 'specimen'
}
total_specimens = []
for s in tqdm(species):
params['taxon'] = s['taxon_name']
total_results, _ = get_totals(params)
s['total_specimens'] = total_results
total_specimens.append(s)
return total_specimens
specimens = get_specimen_totals(df_species.to_dict('records'))
df_specimens = pd.DataFrame(specimens)
Show the top twenty specimens by species!
# Sort the dataframe by total_results then show a slice of the first 20 records
df_specimens.sort_values(by='total_specimens', ascending=False)[:20]
id | taxon_name | common_name | total_specimens | |
---|---|---|---|---|
211 | species/8463 | Amphipoda | Amphipod | 20655 |
1184 | species/8483 | Leptoceridae | Caddisfly | 16639 |
1072 | species/8494 | Leptoceridae | Caddisfly larva | 16639 |
1103 | species/15127 | Chrysomelidae | Eucalyptus Leaf Beetle | 11534 |
204 | species/8532 | Castiarina | Jewel Beetle | 9626 |
208 | species/8480 | Hydropsychidae | Caddisfly | 8340 |
1079 | species/8492 | Hydropsychidae | Caddisfly larva | 8340 |
459 | species/15892 | Ophiurida | Brittle Star | 8318 |
226 | species/8360 | Litoria ewingii | Brown Tree Frog | 6040 |
1196 | species/8468 | Ostracoda | Seed Shrimp | 5925 |
92 | species/8341 | Crinia signifera | Common Eastern Froglet | 5666 |
1398 | species/15125 | Ichneumonidae | NaN | 5404 |
243 | species/8395 | Eulamprus | Water Skink | 5081 |
213 | species/15891 | Holothuroidea | NaN | 4858 |
101 | species/15886 | Anomura | NaN | 3427 |
28 | species/8365 | Litoria raniformis | Southern Bell Frog | 3029 |
1186 | species/8509 | Planorbidae | Freshwater Snail | 3000 |
1221 | species/8425 | Antechinus agilis | Agile Antechinus | 2966 |
255 | species/8396 | Lampropholis | Garden Skink | 2962 |
615 | species/8619 | Zoantharia | Zoanthid | 2786 |
Created by Tim Sherratt for the GLAM Workbench. Support me by becoming a GitHub sponsor!