How many of the functions are actually used?

In this notebook we'll import data about functions that we've harvested earlier and search for each of these functions in RecordSearch to see how many are actually used.

In [13]:
import json
import pandas as pd
from tqdm import tqdm_notebook
import altair as alt
from recordsearch_tools.client import RSAgencySearchClient

alt.renderers.enable('notebook')
Out[13]:
RendererRegistry.enable('notebook')

Load and prepare the data

In [14]:
# Load the JSON file we've already harvested
with open('data/functions.json', 'r') as json_file:
    functions = json.load(json_file)
In [15]:
def get_children(function):
    f_list = []
    if 'narrower' in function:
        for subf in function['narrower']:
            f_list.append(subf['term'])
            f_list += get_children(subf)
    return f_list

functions_list = []
for function in functions:
    functions_list.append(function['term'])
    functions_list += get_children(function)
In [16]:
# Get rid of duplicates
functions_list = set(functions_list)
# Sort terms
sorted(functions_list)
Out[16]:
['accommodation services',
 'acquisition',
 'administrative decision appeal',
 'administrative decision review',
 'administrative law',
 'administrative services',
 'advertising standards',
 'aged persons services',
 'agricultural sciences',
 'agriculture',
 'air force',
 'air force administration',
 'air force commands',
 'air operations',
 'air safety',
 'air transport',
 'air transport safety',
 'aircraft standards',
 'airport services',
 'airports',
 'ambulance services',
 'analytical services',
 'animal and veterinary sciences',
 'applications for native title',
 'applied sciences',
 'arbitration',
 'archives administration',
 'army',
 'army administration',
 'army commands',
 'artifact export regulation',
 'arts',
 'arts development',
 'arts funding',
 'arts incentive schemes',
 'arts promotion',
 'associations and corporate law',
 'atmospheric sciences',
 'audit',
 'australian capital territory',
 'australian defence forces (adf)',
 'banking',
 'bankruptcy',
 'biological sciences',
 'botany',
 'bounties',
 'broadcasting',
 'broadcasting standards',
 'building',
 'built environment',
 'cabinet',
 'call centre administration',
 'carriage service providers',
 'carrier licensing',
 'censorship',
 'censorship standards',
 'census collection',
 'ceremonial functions',
 'chemical and pesticide regulation',
 'child welfare',
 'citizenship',
 'civic infrastructure',
 'civic management',
 'civil engineering',
 'civil law',
 'climate information services',
 'coastal surveillance',
 'collection access',
 'collection accessioning',
 'collection acquisition',
 'collection management',
 'collection promotion',
 'collection storage',
 'colonial administration',
 'commissions of inquiry',
 'committees of inquiry',
 'commonwealth state relations',
 'communications',
 'community health services',
 'community policing',
 'community protection',
 'community services',
 'community support',
 'community transport',
 'compensation schemes',
 'conservation',
 'conservation programs',
 'construction',
 'consular services',
 'consumer affairs',
 'copyright',
 'copyright regulation',
 'coronial law',
 'corporate affairs',
 'corrective services',
 'counselling services',
 'counterfeiting',
 'courier services',
 'court reporting',
 'courts and tribunals',
 'courts martial',
 'criminal law',
 'criminology',
 'crown land administration',
 'cultural affairs',
 'cultural awards and scholarships',
 'cultural festivals',
 'cultural gifts programs',
 'currency',
 'curriculum development',
 'customs',
 'customs regulations',
 'declaration of interests',
 'defence',
 'defence administration',
 'defence coordination',
 'defence estate management',
 'defence force careers',
 'defence forces',
 'defence forces assistance',
 'defence industries',
 'defence intelligence',
 'defence research',
 'defence service home schemes',
 'dental services',
 'deportation',
 'detention programs',
 'development assistance programs',
 'diplomatic missions',
 'disability services',
 'disaster recovery',
 'disaster relief',
 'driving licenses administration',
 'early childhood education',
 'earth sciences',
 'education',
 'education and training',
 'election campaigning',
 'electoral boundary assessment',
 'electoral matters',
 'electronic commerce',
 'electronic postal services',
 'emergency funding',
 'emergency management',
 'emergency services',
 'employment',
 'employment services',
 'energy',
 'energy resources',
 'environment',
 'environmental impact assessment',
 'environmental monitoring',
 'equipment licensing',
 'equity programs',
 'ethical compliance',
 'exchange rates',
 'excise',
 'export regulation',
 'exports and imports',
 'expositions',
 'external security',
 'extraditions',
 'family law',
 'federal law',
 'field force (army)',
 'film production',
 'finance management',
 'financial assistance',
 'financial matters',
 'firefighting services',
 'fiscal policy',
 'fisheries regulation',
 'fleet',
 'flight regulation',
 'foreign investment control',
 'foreign policy',
 'forensic analysis',
 'forestry regulation',
 'freight',
 'freight movement regulation',
 'genetics',
 'goods and services',
 'governance',
 'government accommodation and catering',
 'government media',
 'government representation overseas',
 'governor general',
 'grants administration',
 'health',
 'health care',
 'health insurance',
 'health services',
 'hearing services',
 'historic memorials',
 'historic relic protection',
 'home savings schemes',
 'horticulture',
 'hospitals and clinics',
 'house of representatives committees',
 'housing',
 'human rights',
 'human rights obligations',
 'hydrology',
 'immigration',
 'import regulation',
 'income assessment',
 'indigenous affairs',
 'indigenous cultural heritage',
 'indigenous enterprises',
 'indigenous heritage conservation',
 'indigenous land rights',
 'indigenous settlements',
 'industrial relations',
 'industries',
 'information management standards',
 'information security',
 'inspection services',
 'insurance',
 'intelligence',
 'intelligence liaison',
 'intelligence support',
 'internal security',
 'international affairs',
 'international liaison',
 'international relations',
 'international security liaison',
 'international trade agreements',
 'international treaty participation',
 'internees',
 'interpreter services',
 'interstate trade agreements',
 'investigation',
 'justice administration',
 'juvenile justice',
 'labour market programs',
 'land transport',
 'land use',
 'land use planning',
 'land use zoning',
 'land valuation',
 'language services',
 'law enforcement',
 'leasing',
 'legal',
 'legal aid',
 'legal aid services',
 'legal services',
 'legislation',
 'lighthouses',
 'literature funding',
 'litigation processes',
 'loans',
 'local laws and ordinances',
 'logistics',
 'logistics (air force)',
 'logistics (army)',
 'logistics (defence)',
 'maintenance',
 'marine and rural regulation',
 'marine and rural support',
 'marine life protection programs',
 'marine science',
 'maritime commands (navy)',
 'maritime services',
 'market regulation',
 'marketing',
 'mathematical sciences',
 'media ownership regulation',
 'mediation programs',
 'medical aids regulation',
 'medical and health sciences',
 'medical research',
 'medical research funding',
 'memorials',
 'metals',
 'meteorology',
 'migrant accommodation services',
 'migrant services',
 'migrant settlements programs',
 'migration',
 'military operations',
 'mineral exploration',
 'mineral resources',
 'mining',
 'mobile telephone services',
 'multicultural heritage promotion',
 'multiculturalism',
 'munitions',
 'national events',
 'national fitness',
 'national heritage',
 'national land use',
 'national parks',
 'national referral laboratory services',
 'national service',
 'native title claims',
 'natural disasters',
 'natural heritage protection',
 'naturalisation assessment',
 'navigation',
 'navy',
 'navy administration',
 'navy commands',
 'navy support',
 'nursing services',
 'occupational health and safety',
 'oceanography',
 'oceans governance',
 'ombudsman',
 'ordnance',
 'overseas aid programs',
 'overseas promotion',
 'overseas student scholarship programs',
 'parks',
 'parliamentary chamber administration',
 'parliamentary committees',
 'parliamentary legislation',
 'parliamentary matters',
 'passenger entry control',
 'passenger services',
 'passport services',
 'passports',
 'pastoral',
 'patent registration',
 'patents and trademarks',
 'pathology',
 'peacekeeping forces',
 'pensions and benefits',
 'personnel',
 'pharmaceuticals and medical aids',
 'physical sciences',
 'planning',
 'police administration',
 'police station',
 'pollutant prevention programs',
 'pollution emission control',
 'population-based research',
 'port authorities',
 'port regulation',
 'postal services',
 'preschool education',
 'presentation arrangements',
 'preservation services',
 'primary education',
 'primary industries',
 'prisoners of war',
 'privacy guideline monitoring',
 'property management',
 'prosecution services',
 'protective services',
 'public borrowing',
 'public service',
 'public utilities',
 'publishing',
 'publishing and printing',
 'quarantine',
 'radio broadcasting',
 'radio communication',
 'rail harmonisation standards',
 'rail land acquisition regulation',
 'rail transport',
 'rail transport safety',
 'railway maintenance',
 'rationing and price control',
 'recordkeeping standards',
 'records of the government',
 'recreation',
 'recruitment',
 'refugee services',
 'refugees',
 'regional development',
 'rehabilitation',
 'removals',
 'repatriation',
 'repatriation hospitals',
 'rescue coordination',
 'research',
 'research and development',
 'resources',
 'retail postal services',
 'retirement income',
 'revenue raising',
 'road safety',
 'road surface maintenance',
 'road traffic regulation',
 'road transport',
 'road transport safety',
 'royal commissions',
 'rural community development',
 'rural field day promotion',
 'rural partnership programs',
 'satellite communication',
 'science',
 'scientific research',
 'sea safety',
 'sea transport',
 'seat of government',
 'secondary education',
 'secondary industries',
 'security',
 'security and intelligence',
 'seismography',
 'settlement negotiations',
 'shipbuilding',
 'social and economic research',
 'social justice and equity',
 'social welfare',
 'space science',
 'spatial information research',
 'sport',
 'standard setting',
 'statistics',
 'storage',
 'strategic development',
 'strategic policy',
 'strategic support',
 'student assistance',
 'superannuation',
 'supreme court law',
 'supreme law',
 'surveillance',
 'surveillance, electronic',
 'survey and mapping',
 'tariff',
 'tariff regulation',
 'tariffs',
 'taxation',
 'taxation compliance',
 'telecommunications',
 'telephone services',
 'television broadcasting',
 'territory administration',
 'tertiary education',
 'tourism',
 'tourism industry development',
 'tourist event promotion',
 'trade',
 'trade development programs',
 'trade expositions',
 'trade practices',
 'trade skills assessment',
 'trade union training',
 'trademark registration',
 'training',
 'training (air force)',
 'training (army)',
 'transport',
 'transport and storage',
 'transport infrastructure development',
 'travel authorisation',
 'travel missions',
 'urban development',
 'urban or regional development',
 'valuation',
 'vehicle registration',
 'vehicle standards',
 "veterans' affairs",
 'visas',
 'viticulture',
 'vocational training schemes',
 'war memorials',
 'wartime security',
 'waste disposal',
 'water conservation plans',
 'water quality monitoring',
 'water resources',
 'water usage management',
 'waterway management',
 'weights and measures',
 'works',
 'world heritage listings',
 'zoology']

Search for agencies associated with each function

In RecordSearch, functions are performed by agencies. So when you search for a function you get back a list of agencies. Here we'll loop through the list of functions and search for associated agencies.

In [17]:
function_totals = []
# Use the agency search code in my recordsearch_tools library
rsclient = RSAgencySearchClient()
for function in tqdm_notebook(functions_list):
    agencies = rsclient.search_agencies(function=function, results_per_page=0)
    # Get the total results from each search (replace None with 0)
    total = 0 if agencies['total_results'] == None else int(agencies['total_results'])
    function_totals.append({'function': function, 'total': total})

Explore the results

In [18]:
# Create a DataFrame with the results
df = pd.DataFrame(function_totals)
In [19]:
df.describe()
Out[19]:
total
count 472.000000
mean 26.777542
std 52.284938
min 0.000000
25% 0.000000
50% 0.000000
75% 28.000000
max 417.000000

So 75% of all functions have less than 28 associated agencies.

How many are actiually used?

In [20]:
# How many functions are actually used
used = df.loc[df['total'] > 0].count()
print(used['total'])
229
In [21]:
percent_used = used['function'] / len(functions_list)
print('{:.1%} of the functions are used'.format(percent_used))
48.5% of the functions are used
In [22]:
# Most used function
df.loc[df['total'] == df['total'].max()]
Out[22]:
function total
363 employment 417
In [23]:
# Top 20 by number of agencies
df.sort_values(by='total', ascending=False)[:20]
Out[23]:
function total
363 employment 417
277 education 292
36 army commands 286
416 social welfare 268
376 indigenous affairs 267
244 training 230
219 housing 220
426 scientific research 212
136 migration 198
290 goods and services 195
67 customs 183
256 government representation overseas 176
40 community services 175
122 secondary industries 173
205 administrative law 169
105 broadcasting 168
228 logistics (army) 166
332 sea transport 160
123 health 159
236 air transport 154

Show how agencies are distributed across functions

In [24]:
# Bin the agencies to make it wasier to read
alt.Chart(df).mark_bar().encode(
    x=alt.X('total:Q', bin=alt.Bin(step=10), title='Number of associated agencies'),
    y=alt.Y('count()', title='Number of functions'),
    tooltip=[alt.Tooltip('total:Q', bin=alt.Bin(step=10), title='Agencies'), alt.Tooltip('count()', title='Functions')]
)
Out[24]:

In [ ]: