Notebook

Statistics coded: Government Finance Statistics¶

This notebook aims at reproducing the figures/graphs the Statistics Explained article on government finance statistics.

Settings required¶

Built-il libraries¶

In [1]:

import requests
import json
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

Costum libraries¶

In [2]:

import sys
costum_lib_locations = ['../../src/lib/python']
for location in costum_lib_locations:
    if location not in sys.path:
        sys.path.append(location)
import requestLib
import dataDealer

Variables needed in the notebook¶

Default variables that are used throughout the notebook

noCountry: countries that are not shown in the plots -> type: list
clean_country: countries that are renamed in the plots in a contracted form -> type: dict
host_url: base url for the data extraction -> type: string

In [3]:

noCountry = ['European Union - 27 countries (2007-2013)' ,
             'European Union - 25 countries (2004-2006)',
             'European Union - 15 countries (1995-2004)',
             'Euro area - 11 countries (1999-2000)',
             'Euro area - 12 countries (2001-2006)',
             'Euro area - 13 countries (2007)',
             'Euro area - 15 countries (2008)',
             'Euro area - 16 countries (2009-2010)',
             'Euro area - 18 countries (2014)',
             'Euro area - 17 countries (2011-2013)',
             'Switzerland'
            ]

clean_country = {'Germany': 'Germany',
                 'Euro area - 19' : 'Euro Area (EA-19)', 
                 'European Union - 27 countries (from ': 'EU-27', 
                 'European Union - 28' : 'EU-28'}

host_url = "http://ec.europa.eu/eurostat/wdds/rest/data/v2.1/json/en/"

Functions needed in the notebook¶

assign_reference: set lines and columns to prepare the dataFrame creation
response_fun: main function that extracts the data from the server and returns the response object whose attributes are the data in dictionary, string and dataframe datatype
prepare_for_plot: return plottable values from the data used in the last two sections

In [4]:

def assign_reference(data, multiplicity='time'):
    '''
    set lines and columns to prepare the dataFrame creation
        
        params: data: dict: dictionary of data
        params: multiciplity: string: multiple key argument
        
        return: {'lines' : lines, 'columns': columns} : dict: dataFramestructure
    '''
    lines = list(list(data['dimension']['geo']['category'].values())[1].values())
    columns= list(list(data['dimension'][multiplicity]['category'].values())[1].values())
    return {'lines' : lines, 'columns': columns}

def prepare_for_plot(instances, total, subtraction=False, where=(False,False)):
    '''
    Prepare data for the bar plot
        
        params: instances: list: list of instances of the component to request 
                                    - look at the last sections form more explainations
        params: total: dict: total compoment 
        params: substraction: bollean: set if there is a substraction in the percentage calculations 
                                            - False by default, can only diffear from the default value if 
                                                "where" is not (False, False)
        params: where: tuple: say for which compoments there is the subtraction 
                                            - (False, False) by default, can only diffear from the default value if 
                                                "subtraction" is set to True
        
        return: output: dict: data ready for the bar component plot
    '''
    if where != (False, False) and not subtraction:
        print('[-] Bad arguments !\n Cannot give the wehe argument of no subtraction is present')
        return False
    elif subtraction and where == (False,False):
        print('[-] Bad arguments !\n Need to tell where the substraction is')
        
    output = {}
    for item in instances:
        countries = item.response.lines
        values = item.response.values
        name = item.name
        if len(values.keys()) > 1:
            tmp = {}
            for item in countries:
                tmp[item] = []
            for key in values:
                for c in values[key]:
                    tmp[c].append(values[key][c])
            temp = {}
            for key in tmp:
                
                if subtraction:
                    if where[0] in list(values.keys()):
                        s = 0
                        for i in range(len(tmp[key])):
                            if i == where[1]:
                                s += -tmp[key][i]
                            else:
                                s += tmp[key][i]
                        if total[key] != 0:
                            temp[key] = s / total[key] * 100
                        else:
                            temp[key] = 0.
                    else:    
                        if total[key] != 0:
                            temp[key] = sum(tmp[key]) / total[key] * 100
                        else:
                            temp[key] = 0.
                    output[name] = temp
                else:    
                    if total[key] != 0:
                        temp[key] = sum(tmp[key]) / total[key] * 100
                    else:
                        temp[key] = 0.
            output[name] = temp
        else:
            temp = {}
            label = list(values.keys())[0]
            for key in values[label]:
                if total[key] != 0:
                    temp[key] = values[label][key] / total[key] * 100
                else:
                    temp[key] = 0.
            output[name] = temp
    return output

def response_fun(client, args={}, remove_list=[], clean_dict={}, void_item=False, multiplicity='time', display_success=False, toZero=True):
    '''
    Extracts the data from the server and returns the response object whose attributes are the data in dictionary, 
        string and dataframe datatype
        
        param: client: requestLib.RequestHandeler_Object: object that performs the GET request tothe eurostat 
                                                                server
        param: args: dict: arguments for the url to request - empty by default
        param: remove_list: list: countries that are not shown in the plots - empty by default
        param: clean_dict: dict: countries that are renamed in the plots in a contracted form - empty by default
        param: void_item: boolean: void labels for not shown countries - False by default
        param: multiciplity: string: multiple key argument - "time" by default
        param: display_success: boolean: print success status - False by default
        
        return: response: requestLib.Response_Object
    '''
    if args != {}:
        response = client.get_request(args, display_success=display_success)
    else:
        response = client.get_request(client.args)
    frame = assign_reference(response.data['dict'], multiplicity=multiplicity)
    country = frame['lines']
    times = frame['columns']
    if 'status' not in list(response.data['dict'].keys()):
        v = list(response.data['dict']['value'].values())
    else:
        if toZero:
            v = dataDealer.check_data_by_status(response.data['dict'], toZero=True)
        else:
            v = dataDealer.check_data_by_status(response.data['dict'], toZero=False)
    response.values, response.lines = dataDealer.json_to_data(lines=frame['lines'], 
                                         columns=frame['columns'], 
                                         values=v, 
                                         remove_list=remove_list, 
                                         void_item=void_item, 
                                         clean_dict=clean_dict,
                                         multiple_key=multiplicity)
    
    response.DF = dataDealer.subjason_to_DataFrame(lines=response.lines, columns=times, subDict=response.values)
    response.x_labels = dataDealer.clean_label(lines=response.lines, remove_list=remove_list)
    
    return response
    

Instances¶

Two main instances are presentin this notebook

client: object that performs the GET request tothe eurostat server -> only instancied once
response: response object returned by the return_fun, containig the whole data -> instanciated at each GET request by the client

In [5]:

client = requestLib.RequestHandeler(host_url)

Procedure¶

For each visualisation we perform the following steps:

Prepare the GET request url arguments

If not prepared before or if the request is not directly related to the one just before, the args_to_dict method of the requestLib module is used.
If only few arguments need to be chaged from the privious request, the update_args method of the client instance is used.

Run the response_fun function with the proper arguments (look at the description in the function definition).
If more calculations are needed, do them. If not, jump to the next point.
Plot the data or print the subdataframe depending on what has to be shown.

The last two sections Main components of government revenu, 2019 and Main components of government expenditures, 2019 need a different approach that is explained under the former section.

Public Balance 2018 and 2019¶

Arguments for the GET request:
- sector = 'S13' - general government
- na_item = 'B9' - Net borrowing or lending
- time = 2018, 2019 - 2018 & 2019
- unit = 'PC_GDP' - percentage of GDP
Arguments to response_fun and plot filters:
- clean_dict = clean_country
- remove_list = noCountry
- void_item = True - void labels for non shown countries

In [6]:

#%matplotlib inline
#args = requestLib.args_to_dict('table= gov_10dd_edpt1', 'na_item=B9', 'precision=1', 'unit=PC_GDP', 'time=2018,2019','sector=S13')
args = requestLib.args_to_dict_fun(table='gov_10dd_edpt1', na_item='B9', unit = 'PC_GDP', time='2018,2019', sector='S13')
r = response_fun(client,  clean_dict=clean_country, remove_list=noCountry,args=args, void_item=True)

fig, ax = plt.subplots()
plt.title("Public balance, 2018 and 2019")
pos = np.arange(len(r.x_labels))
ax.set_xticks(pos)
ax.set_xticklabels(r.x_labels, rotation = 90)
width = 0.35

ax.bar(pos -  width/2., list(r.values['2018'].values()),  width, label='2018')
ax.bar(pos +  width/2., list(r.values['2019'].values()),  width, label='2019')
fig.set_size_inches(12, 6)
plt.legend(loc='upper right', prop={'size':12})
plt.grid()
plt.show()

Public Balance and General Governement Debt, 2016-2019¶

In [7]:

r = response_fun(client,  clean_dict=clean_country, remove_list=noCountry,args=args, void_item=True)
debt = r.DF
debt

Out[7]:

	2018	2019
Austria	0.2	0.7
Belgium	-0.8	-1.9
Bulgaria	2.0	1.9
Cyprus	-3.5	1.5
Czechia	0.9	0.3
Germany	1.8	1.5
Denmark	0.7	3.8
Euro area - 17 countries (2011-2013)	0.0	0.0
Euro area - 18 countries (2014)	0.0	0.0
Euro Area (EA-19)	-0.5	-0.6
Estonia	-0.5	0.1
Greece	1.0	1.5
Spain	-2.5	-2.9
European Union - 25 countries (2004-2006)	0.0	0.0
European Union - 27 countries (2007-2013)	0.0	0.0
EU-27	-0.4	-0.5
EU-28	-0.7	-0.8
Finland	-0.9	-1.0
France	-2.3	-3.0
Croatia	0.2	0.4
Hungary	-2.1	-2.1
Ireland	0.1	0.5
Italy	-2.2	-1.6
Lithuania	0.6	0.3
Luxembourg	3.1	2.4
Latvia	-0.8	-0.6
Malta	2.0	0.5
Netherlands	1.4	1.7
Poland	-0.2	-0.7
Portugal	-0.3	0.1
Romania	-2.9	-4.4
Sweden	0.8	0.5
Slovenia	0.7	0.5
Slovakia	-1.0	-1.4
United Kingdom	-2.2	-2.3

General government debt, 2019 and 2018¶

Arguments for the GET request:
- sector = 'S13' - general government
- na_item = 'GD' - conslidated gross
- time = '2018,2019 - 2018 & 2019
- unit = 'PC_GDP' - percentage of GDP
Arguments to response_fun and plot filters:
- clean_dict = clean_country
- remove_list = noCountry
- void_item = True - void labels for non shown countries

In [8]:

args = requestLib.args_to_dict_fun(table='gov_10dd_edpt1', na_item='GD', unit = 'PC_GDP', time='2018,2019', sector='S13')

r = response_fun(client,  clean_dict=clean_country, remove_list=noCountry, void_item=True, args=args)

pos = np.arange(len(r.x_labels))

fig, ax = plt.subplots()
plt.title("General government debt, 2018 and 2019")
ax.set_xticks(pos)
ax.set_xticklabels(r.x_labels, rotation = 90)
width = 0.35

ax.bar(pos -  width/2., list(r.values['2018'].values()),  width, label='2018')
ax.bar(pos +  width/2., list(r.values['2019'].values()),  width, label='2019')
fig.set_size_inches(12, 6)
plt.legend(loc='upper right', prop={'size':12})
plt.grid()
plt.show()

Government revenu and expenditure 2009 - 2019¶

Development of total expenditure and total revenue 2009-2019 (% of GDP)¶

Arguments for the GET request
1. First request
- sector = 'S13' - general government
- na_item = 'TE' - total expenditure
- time = 2009,2010,2011,2012,2013,2014,2015,2016,2017,1018,2019 - from 2009 to 2019
- unit = 'PC_GDP' - percentage of GDP
1. Second request
- sector = 'S13' - general government
- na_item = 'TR' - total ereveue
- `time = 2009,2010,2011,2012,2013,2014,2015,2016,2017,1018,2019 - from 2009 to 2019
- unit = 'PC_GDP' - percentage of GDP
Arguments to response_fun and filter countries
- clean_dict = clean_country (defined above)
- Only the countries 'EU-27' and Euro Area ('EA-19')

In [9]:

y = {'total expenditure': {'EU-27':[],'Euro Area (EA-19)' :[] }, 
     'total revenue': {'EU-27':[],'Euro Area (EA-19)' :[] }
     }
args = requestLib.args_to_dict_fun(table = 'gov_10a_main',
                                   na_item = 'TE',
                                   unit = 'PC_GDP',
                                   time = '2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019',
                                sector = 'S13')
expenditures = response_fun(client,  clean_dict=clean_country,args=args)
for country in y['total expenditure']:
    for year in expenditures.values:
        y['total expenditure'][country].append(expenditures.values[year][country])

args = client.update_args('na_item=TR')
revenues = response_fun(client,  clean_dict=clean_country, args = args)

for country in y['total revenue']:
    for year in revenues.values:
        y['total revenue'][country].append(revenues.values[year][country])

##### PLOT
plt.figure(figsize=(8,4))
plot_colors = ['C0', 'C1']
plt.title("Development of total expenditure and total revenue 2009-2019 (% of GDP)")
x = list(revenues.values.keys())
for measure in y:
    i = 0
    for country in y[measure]:
        if measure == 'total revenue':
            line_style = '--'
        else:
            line_style = '-'   
        plt.plot(x,y[measure][country],ls=line_style, marker='x', label='%s: %s' % (country, measure), color=plot_colors[i])
        i += 1
plt.grid()
plt.legend(bbox_to_anchor=(0.8, -0.1), prop={'size':12})
plt.show()

Development of total expenditure and total revenue 2009-2019 (Billion EUR)¶

Arguments for the GET request:
1. First request
- sector = 'S13' - general government
- na_item = 'TE' - total expenditure
- time = 2009,2010,2011,2012,2013,2014,2015,2016,2017,1018,2019 - from 2009 to 2019
- unit = 'MIO_EUR '- billion EUR
1. Second request
- sector = S13 - general government
- na_item = TR - total ereveue
- `time = 2009,2010,2011,2012,2013,2014,2015,2016,2017,1018,2019 - from 2009 to 2019
- unit = 'MIO_EUR' - billion EUR
Arguments to response_fun and filter countries
- clean_dict = clean_country (defined above)
- Only the countries 'EU-27' and Euro Area ('EA-19')

In [10]:

y = {'total expenditure': {'EU-27':[],'Euro Area (EA-19)' :[] }, 
     'total revenue': {'EU-27':[],'Euro Area (EA-19)' :[] }
     }

args = requestLib.args_to_dict('table=gov_10a_main', 
                    'na_item=TE', 
                    'precision=1', 
                    'unit=MIO_EUR', 
                    'time= 2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019',
                    'sector=S13')

expenditures = response_fun(client,  clean_dict=clean_country, args = args)

for country in y['total expenditure']:
    for year in expenditures.values:
        y['total expenditure'][country].append(expenditures.values[year][country])
        
args = client.update_args('na_item=TR')
revenues = response_fun(client,  clean_dict=clean_country, args = args)

for country in y['total revenue']:
    for year in revenues.values:
        y['total revenue'][country].append(revenues.values[year][country])

##### PLOT
plt.figure(figsize=(9,5))
plot_colors = ['C0', 'C1']
plt.title("Development of total expenditure and total revenue 2009-2019 (% of GDP)")
x = list(revenues.values.keys())
for measure in y:
    i = 0
    for country in y[measure]:
        if measure == 'total revenue':
            line_style = '--'
        else:
            line_style = '-'   
        plt.plot(x,y[measure][country],ls=line_style, marker='x', label='%s: %s' % (country, measure), color=plot_colors[i])
        i += 1
plt.grid()

plt.legend(bbox_to_anchor=(0.8, -0.1), prop={'size':12})
plt.show()

Governement revenue and expenditure, 2019¶

Arguments for the GET request:
- sector = 'S13' - general goverment
- na_item = 'TE','TR' - total expenditures & total revenues
- time = 2019
Arguments to response_fun and plot filters
- remove_list = noCountry (defined above)
- clean_dict = clean_countries (defined above)
- void_item = True - void labels for not shown countries
- multiciplity = na_item

In [11]:

args = requestLib.args_to_dict('table=gov_10a_main', 
                    'na_item=TE,TR', 
                    'precision=1', 
                    'unit=PC_GDP', 
                    'time= 2019',
                    'sector=S13')

r = response_fun(client,  clean_dict=clean_country, remove_list=noCountry,void_item=False,multiplicity='na_item' ,args = args)

pos = np.arange(len(r.x_labels))
fig, ax = plt.subplots()

plt.title("Government revenue and expenditure")
ax.set_xticks(pos)
ax.set_xticklabels(r.x_labels, rotation = 90)
width = 0.35
a = pos - width/2.

ax.bar(pos - width/2., list(r.values['Total general government expenditure'].values()),  width, label='Total general government expenditure')
ax.bar(pos +  width/2., list(r.values['Total general government revenue'].values()),  width, label='Total general government revenue')
plt.grid()
fig.set_size_inches(12, 6)
plt.legend(bbox_to_anchor=(0.8, -0.35), prop={'size': 16})
plt.show()

Main components of government revenu, 2019 ¶

Values use and calculations:

$\begin{align*} \text{Total expenditure (TR)} {}={} & \ \ \ \ \ \ \text{total taxes} & \text{D2PAY} + \text{D5PAY} + \text{D91PAY}\\ & + \ \text{total social contributions} & \text{D61PAY}\\ & + \ \text{total sales of goods and services} & \text{P11_P12_P131}\\ & + \ \text{other current revenu} & \text{D39PAY} + \text{D4PAY} + \text{D7PAY}\\ & + \ \text{other capital revenue} & \text{D92PAY} + \text{D99PAY}\\ \end{align*}$

Not all countries in the databse present all the components searched. Therefore only all the countries presenting these componets are plotted.

In a first step all the countries are extracted in all the requests, then after a filter, a second sequence of extraction is done with oly the relevent onces.

The filter is implemented by checking the request with the smaller number of countries returned, from which we are able to find then countries that have to be neglected for the plot.

The implementation of the secrion is done using the Component class and the return_response function defined below

Component class

Each component of the total rvenu/expenditure is an instance of the Component class

Arguments to the construction function:
- name: name of the component
- na_item: the correponding na_item/s
- label: the label to show in the plot

In [12]:

class Component:
    def __init__(self,name, na_item, label):
        self.name = name
        self.na_item = na_item
        self.label = label

return_response function

Take as inputs the list of instances, the default remove_list defined in the section above and the client object instanced above. It returns the lenghts of the returns countries and the Compoment instaces updated by the response of the GET request

In [13]:

def return_response(istances, remove_list, client):
    lens = []
    for instance in istances:
        client.update_args('na_item=%s' % (instance.na_item))
        instance.response = response_fun(client, clean_dict=clean_country, remove_list=remove_list, void_item=False, multiplicity='na_item')
        lens.append(len(instance.response.lines))
    return lens, instances

In [14]:

taxes = Component('taxes','D2REC,D5REC,D91REC', 'Taxes')
social_contribution = Component('social contributions','D61REC', 'Net social contributions')
sales = Component('sales', 'P11_P12_P131', 'Market output,output for own final user and payments for non-market ptoduction')
proprety_income = Component('proprety income','D92REC,D99REC', 'Proprety income')
other = Component('other','D39REC,D4REC,D7REC', 'other')

instances = [taxes, social_contribution,sales, proprety_income, other]
args = requestLib.args_to_dict_fun(table='gov_10a_main', unit = 'PC_GDP', time='2019', sector='S13', na_item='')

lens, instances = return_response(instances, noCountry, client)

min_len_countries = [item.response.lines for item in instances if len(item.response.lines) == min(lens)][0]
diffCountry = [item for item in other.response.lines if item not in min_len_countries]
avoidCountry = noCountry+diffCountry
   
lens, instances = return_response(instances, avoidCountry, client)

client.update_args('na_item=TR')
r = response_fun(client, clean_dict=clean_country, remove_list=avoidCountry, void_item=False, multiplicity='na_item')

revenu = r.values['Total general government revenue']

output = prepare_for_plot(instances, revenu)
plot_x_labels = taxes.response.x_labels
fig, ax = plt.subplots()
pos = np.arange(len(plot_x_labels))
plt.title("Main components of government revenu, 2019")
ax.set_xticks(pos)
ax.set_xticklabels(plot_x_labels, rotation = 90)
width = 0.35

b = np.zeros(len(list(output['taxes'].values())))
for i in range(len(instances)):
    a = np.array(list(output[instances[i].name].values()))
    if i == 0:
        ax.bar(pos,a,width, label=instances[i].label)
    else:
        ax.bar(pos,a,width, bottom=b, label=instances[i].label)
    b += a
fig.set_size_inches(9, 4)
plt.legend(bbox_to_anchor=(1.05, -0.35), prop={'size': 12})


plt.show()

Main components of government expenditures, 2019 ¶

Values used and calculations:

$\begin{align*} \text{Total expenditure (TE)} {}={} & \ \ \ \ \ \ \text{intermediate consumption} & \text{P2}\\ & + \ \text{compensation of employees} & \text{D1PAY}\\ & + \ \text{interest} & \text{D41PAY}\\ & + \ \text{social benefits other than social transfers in kind} & \text{D62PAY}\\ & + \ \text{social transfers in kind via market producers} & \text{D632PAY}\\ & + \ \text{subsides} & \text{D3PAY}\\ & + \ \text{other current expenditure} & \text{D29PAY} + \text{(D4PAY}-\text{D41PAY)}+\text{D5PAY} + \text{D7PAY} + \text{D8PAY}\\ & + \ \text{capital expenditure} & \text{P5} + \text{NP} + \text{D92PAY} + \text{D99PAY} \end{align*}$

In [20]:

args = requestLib.args_to_dict_fun(table='gov_10a_main', unit = 'PC_GDP', time='2019', sector='S13', na_item='')

social_tranfers = Component('social tranfers','D632PAY', 'Social tranfers')
compensation_emp = Component('compensation of employees','D1PAY', 'compensation of employees, payable')
intermediate_consuption = Component('intermediate consumption','P2', 'Intermediate consumption')
capital_expenditure = Component('capital expenditure', 'P5,NP,P92PAY,P99PAY', 'Cital expenditures')
social_benefits = Component('social benefits','D62PAY', 'Social benefits other than social tranfers in kind')
other = Component('Other current expenditure','D29PAY,D4PAY,D41PAY,D5PAY,D7PAY,D8PAY', 'Other current expenditure')
subsices = Component('subsides','D3PAY', 'Subsidies, payable')
interest = Component('Other', 'D41PAY', 'Other')

instances = [
    social_benefits,
    compensation_emp,
    intermediate_consuption,
    social_tranfers, 
    capital_expenditure,
    other,
    subsices,
    interest
]

lens, instances = return_response(instances, noCountry, client)

min_len_countries = [item.response.lines for item in instances if len(item.response.lines) == min(lens)][0]
diffCountry = [item for item in other.response.lines if item not in min_len_countries]
avoidCountry = noCountry+diffCountry
   
lens, instances = return_response(instances, avoidCountry, client)

client.update_args('na_item=TE')
r = response_fun(client, clean_dict=clean_country, remove_list=avoidCountry, void_item=False, multiplicity='na_item')

expenditure = r.values['Total general government expenditure']

output = prepare_for_plot(instances, expenditure, subtraction=True ,where=('Property income, payable', 2))

plot_x_labels = instances[0].response.x_labels
fig, ax = plt.subplots()
pos = np.arange(len(plot_x_labels))
plt.title("Main components of government expenditures, 2019")
ax.set_xticks(pos)
ax.set_xticklabels(plot_x_labels, rotation = 90)
width = 0.35

b = np.zeros(len(list(output['subsides'].values())))
for i in range(len(instances)):
    a = np.array(list(output[instances[i].name].values()))
    if i == 0:
        ax.bar(pos,a,width, label=instances[i].label)
    else:
        ax.bar(pos,a,width, bottom=b, label=instances[i].label)
    b += a
    
fig.set_size_inches(12, 4)
plt.legend(bbox_to_anchor=(0.8, -0.40), prop={'size': 12})
plt.show()

Restore import settings¶

In [16]:

# lanch as last commend to restore the previous python import settings
for location in costum_lib_locations:
    try:
        sys.path.remove(location)
    except ValueError:
        print("path already removed")