Notebook

Visualise the connections of a single Australian government agency¶

Select an agency from the dropdown list to view its predecessors and successors as a network graph. Each agency is represented as a node whose position and colour is determined by the decade in which the agency was created. The size of the node indicates how long the agency was in existence, while edges between nodes connect agencies to their successors.

In [ ]:

import json
import os

import arrow
import ipywidgets as widgets
import pandas as pd
from IPython.display import IFrame, display
from pyvis.network import Network
from slugify import slugify
from SPARQLWrapper import JSON, SPARQLWrapper

In [ ]:

starting_agency = "Q16956162"
levels = 3
pathway = ["(wdt:P1366|wdt:P1365)?"]

In [ ]:

sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

In [ ]:

def get_agencies():
    sparql.setQuery(
        """
    # This query returns a list of departments and includes the WD ID and a label with the dept name and date range
    SELECT
      ?agency ?label
    WHERE {
      ?agency wdt:P10856 ?naaID;
              wdt:P31/wdt:P279* wd:Q57605562;
              wdt:P571 ?start_date;
              rdfs:label ?agency_label.
      OPTIONAL { ?agency wdt:P576 ?end_date. }
      FILTER (lang(?agency_label) = "en").
      # Combine start and end year into a single string, setting end date to "" if it doesn't exist
      BIND(concat(xsd:string(YEAR(?start_date)), "-", COALESCE(xsd:string(YEAR(?end_date)), "")) as ?date_range)
      # Combine dept name and date range into a single string
      BIND(concat(?agency_label, " (", ?date_range, ")") as ?label)
    } ORDER BY ?label ?start_date
    """
    )

    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    df_depts = pd.json_normalize(results["results"]["bindings"], sep="_")
    df_depts["agency_id"] = df_depts["agency_value"].str.extract(r"(Q\d+)")
    return [(a["label_value"], a["agency_id"]) for a in df_depts.to_dict("records")]

In [ ]:

# GRAPH CONFIG AND STYLING

# Tableau style colours from http://tableaufriction.blogspot.com/2012/11/finally-you-can-use-tableau-data-colors.html
rgb = [
    "255.187.120",
    "255.127.14",
    "174.199.232",
    "44.160.44",
    "31.119.180",
    "255.152.150",
    "214.39.40",
    "197.176.213",
    "152.223.138",
    "148.103.189",
    "247.182.210",
    "227.119.194",
    "196.156.148",
    "140.86.75",
    "127.127.127",
    "219.219.141",
    "199.199.199",
    "188.189.34",
    "158.218.229",
    "23.190.207",
]


def make_darker(colour, factor=0.75):
    """
    Darken colour by given factor.
    """
    return [str(round(int(c) * factor)) for c in colour]


def make_lighter(colour, factor=0.75):
    """
    Lighten colour by given factor.
    """
    return [str(round((255 - int(c)) * factor) + int(c)) for c in colour]


# List of Tableau style colours
colours = [f'rgb({",".join(r.split("."))})' for r in rgb]
# List of darkened colors
borders = [f'rgb({",".join(make_darker(r.split(".")))})' for r in rgb]
# List of lightened colours
highlights = [f'rgb({",".join(make_lighter(r.split(".")))})' for r in rgb]

# Create groups for each decade in the date range, assigning a different colour for each group
decades = [str(d) for d in range(190, 203)]
decade_groups = {
    d: {
        "color": {
            "background": colours[i],
            "border": borders[i],
            "highlight": {"background": highlights[i], "border": borders[i]},
        }
    }
    for i, d in enumerate(decades)
}


def get_decade_highlight(decade):
    for d, g in decade_groups.items():
        if d == decade:
            return g["color"]["highlight"]["background"]


# Calculate the possible range of values for the length of an agency's existence
max_days = (arrow.utcnow() - arrow.get("1901-01-01")).days
min_days = 1
current_range = max_days - min_days


def calculate_size(start, end, current_range=current_range, biggest=150, smallest=30):
    """
    Calculate the size of nodes based on each agency's length of existence.
    Adjust value to fall with the desired range.
    See: https://stackoverflow.com/a/929107
    """
    start_date = arrow.get(start)
    try:
        end_date = arrow.get(end)
    except (ValueError, TypeError):
        end_date = arrow.utcnow()
    delta = end_date - start_date
    return (((delta.days - 1) * (biggest - smallest)) / current_range) + 20


graph_options = {
    "configure": {"enabled": False},
    "layout": {
        "hierarchical": {
            "enabled": True,
            "sortMethod": "directed",
            "shakeTowards": "leaves",
            "nodeSpacing": 20,
            "levelSeparation": 20,
            "treeSpacing": 20,
        }
    },
    "physics": {"hierarchicalRepulsion": {"avoidOverlap": 1, "nodeDistance": 100}},
    "nodes": {"font": {"size": 20}},
    "groups": decade_groups,
    "edges": {
        "arrows": {
            "to": {"enabled": True, "scaleFactor": 0.5},
            "arrowStrikethrough": False,
        },
        "smooth": {"enabled": True},
        "color": {"color": "#b0bec5", "inherit": True},
    },
}

In [ ]:

def get_sparql_data(starting_agency, pathway=pathway, levels=levels):
    query = """
    SELECT DISTINCT ?agency ?label ?id ?start_date ?end_date
                    ?after ?afterLabel ?after_id
      WHERE {{
            wd:{} ({}) ?agency.
            ?agency wdt:P10856 ?id;
            wdt:P571 ?start_date;
            rdfs:label ?agency_label.
      OPTIONAL {{ ?agency wdt:P576 ?end_date. }}
      OPTIONAL {{ ?agency wdt:P1366 ?after.
                 ?after wdt:P10856 ?after_id. }}
      FILTER (lang(?agency_label) = "en").
      # Combine start and end year into a single string, setting end date to "" if it doesn't exist
      BIND(concat(xsd:string(YEAR(?start_date)), "-", COALESCE(xsd:string(YEAR(?end_date)), "")) as ?date_range)
      # Combine dept name and date range into a single string
      BIND(concat(?agency_label, " (", ?date_range, ")") as ?label)
    }}
    """
    sparql_query = query.format(starting_agency, " / ".join(pathway * levels))
    sparql.setQuery(sparql_query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return pd.json_normalize(results["results"]["bindings"], sep="_")

In [ ]:

def make_graph(df, starting_agency, graph_options=graph_options):
    net = Network(notebook=True, layout=True, cdn_resources="remote")
    for agency in df.itertuples():
        if starting_agency in agency.agency_value:
            color = get_decade_highlight(agency.start_date_value[:3])
            border = 4
            border_selected = 4
        else:
            color = ""
            border = 1
            border_selected = 2
        net.add_node(
            agency.id_value,
            label=agency.id_value,
            title=agency.label_value,
            group=agency.start_date_value[:3],
            level=int(agency.start_date_value[:4]),
            size=calculate_size(agency.start_date_value, agency.end_date_value),
            color=color,
            borderWidth=border,
            borderWidthSelected=border_selected,
        )
    for agency in df.dropna(subset=["after_id_value"]).itertuples():
        try:
            net.add_edge(agency.id_value, agency.after_id_value)
        except AssertionError:
            pass
    net.set_options(f"var options = {json.dumps(graph_options)}")
    with out:
        net.write_html(f"single-agency-{slugify(starting_agency)}.html", notebook=True)
        display(
            IFrame(
                f"single-agency-{slugify(starting_agency)}.html",
                height=800,
                width="100%",
            )
        )

Choose how many levels to include

In [ ]:

select_levels = widgets.Dropdown(options=[2, 3, 4, 5], value=3)
display(select_levels)

Select a department

In [ ]:

def display_agency_graph(agency):
    out.clear_output()
    if agency.new:
        df = get_sparql_data(agency.new, levels=select_levels.value)
        make_graph(df, agency.new)


options = get_agencies()
options.insert(0, ("-- Select a department --", ""))

out = widgets.Output()
select_agency = widgets.Dropdown(options=options)
select_agency.observe(display_agency_graph, names="value")

display(select_agency)
display(out)

In [ ]:

%%capture
# Load environment variables if available
%load_ext dotenv
%dotenv

In [ ]:

# TESTING
if os.getenv("GW_STATUS") == "dev":
    select_agency.value = "Q16959680"

Created by Tim Sherratt for the GLAM Workbench.

The development of the Wikidata section of the GLAM Workbench was supported by Wikimedia Australia.