from pandasdmx import Request
import pandas as pd
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, LabelSet
estat = Request('ESTAT')
dflow = estat.dataflow()
def dsd_resp(id):
"""
The function takes a dataflow identifier and requests from the Eurostat SDMX api the dataflow's datastructure, datastructure id and datastructure response.
"""
# Defining the data set's dataflow based on the data set id.
df = dflow.dataflow[id]
# Printing the table's name
print("The table's name:")
print(dflow.write().dataflow.loc[id][0])
# Acquiring the dataflow's datastructure id
dsd_id = df.structure.id
# Requesting for the dataflow's datastructure
dsd_resp = estat.get(resource_type = 'datastructure', resource_id = dsd_id)
# Requesting the dataflow's datastructure
dsd = dsd_resp.datastructure[dsd_id]
return dsd_resp, dsd_id, dsd
def dsd_att(dsd):
"""The function takes a table datastructure definition and prints its measure, dimension and attribute lists."""
# Measures
print("Measures:")
print(dsd.measures.aslist())
# Dimensions
print("\nDimensions:")
print(dsd.dimensions.aslist())
# Attributes
print("\nAttributes:")
print(dsd.attributes.aslist())
dflow.write().dataflow[dflow.write().dataflow.index.str.startswith('htec') == True]
name | |
---|---|
dataflow | |
htec_cis3 | Innovation in high-tech sectors in SMEs (CIS3)... |
htec_cis4 | Innovation in high-tech sectors (CIS 2004), EU... |
htec_cis5 | Innovation in high-tech sectors (CIS 2006), EU... |
htec_cis6 | Innovation in high-tech sectors (CIS 2008, CIS... |
htec_eco_ent | Enterprises in high-tech sectors by NACE Rev.1... |
htec_eco_ent2 | Enterprises in high-tech sectors by NACE Rev.2... |
htec_eco_sbs | Economic data in high-tech sectors by NACE Rev... |
htec_eco_sbs2 | Economic data in high-tech sectors by NACE Rev... |
htec_emp_nat | Employment in technology and knowledge-intensi... |
htec_emp_nat2 | Employment in technology and knowledge-intensi... |
htec_emp_nisced | Employment in technology and knowledge-intensi... |
htec_emp_nisced2 | Employment in technology and knowledge-intensi... |
htec_emp_nisco | Employment in technology and knowledge-intensi... |
htec_emp_nisco2 | Employment in technology and knowledge-intensi... |
htec_emp_reg | Employment in technology and knowledge-intensi... |
htec_emp_reg2 | Employment in technology and knowledge-intensi... |
htec_emp_risced | Employment in technology and knowledge-intensi... |
htec_emp_risced2 | Employment in technology and knowledge-intensi... |
htec_emp_risco | Employment in technology and knowledge-intensi... |
htec_emp_risco2 | Employment in technology and knowledge-intensi... |
htec_emp_sbs | Employment statistics on high-tech industries ... |
htec_emp_sbs2 | Employment statistics on high-tech industries ... |
htec_kia_emp | Annual data on employment in knowledge-intensi... |
htec_kia_emp2 | Annual data on employment in knowledge-intensi... |
htec_si_exp4 | High-tech exports - Exports of high technology... |
htec_sti_exp | Business enterprise R&D expenditure in high-te... |
htec_sti_exp2 | Business enterprise R&D expenditure in high-te... |
htec_sti_pers | Business enterprise R&D personnel in high-tech... |
htec_sti_pers2 | Business enterprise R&D personnel in high-tech... |
htec_trd_group4 | High-tech trade by high-tech group of products... |
htec_trd_tot4 | Total high-tech trade in million euro and as a... |
htec_vci_stage1 | Venture capital investment by aggregated stage... |
htec_vci_stage2 | Venture capital investment by detailed stage o... |
id1 = 'htec_sti_exp2'
dflow.write().dataflow.loc[id1][0]
'Business enterprise R&D expenditure in high-tech sectors - NACE Rev. 2'
id2 = 'htec_vci_stage2'
dflow.write().dataflow.loc[id2][0]
'Venture capital investment by detailed stage of development (from 2007, source: EVCA)'
dsd_resp2, dsd_id2, dsd2 = dsd_resp(id2)
The table's name: Venture capital investment by detailed stage of development (from 2007, source: EVCA)
dsd_att(dsd2)
Measures: [PrimaryMeasure | OBS_VALUE] Dimensions: [Dimension | FREQ, Dimension | EXPEND, Dimension | UNIT, Dimension | GEO, TimeDimension | TIME_PERIOD] Attributes: [DataAttribute | OBS_FLAG, DataAttribute | OBS_STATUS]
Codelist:
dsd_resp2.write().codelist.loc[:,:] # [['EXPEND', 'UNIT'],:]
dim_or_attr | name | ||
---|---|---|---|
FREQ | FREQ | D | FREQ |
A | D | Annual | |
D | D | Daily | |
H | D | Half-year | |
M | D | Monthly | |
Q | D | Quarterly | |
S | D | Semi-annual | |
W | D | Weekly | |
EXPEND | EXPEND | D | EXPEND |
INV | D | Investment | |
INV_BUY | D | Investment for buyout | |
INV_GROW | D | Investment in growth stage | |
INV_REPL | D | Investment for replacement | |
INV_RESC | D | Investment for rescue/turnaround | |
INV_VEN | D | Venture capital investment (seed, start-up and... | |
INV_VEN_LATE | D | Investment in later stage | |
INV_VEN_SEED | D | Investment in seed stage | |
INV_VEN_STAR | D | Investment in start-up stage | |
UNIT | UNIT | D | UNIT |
MIO_EUR | D | Million euro | |
NR_COMP | D | Number of companies | |
PC_GDP | D | Percentage of gross domestic product (GDP) | |
GEO | GEO | D | GEO |
AT | D | Austria | |
BE | D | Belgium | |
BG | D | Bulgaria | |
CH | D | Switzerland | |
CZ | D | Czech Republic | |
DE | D | Germany (until 1990 former territory of the FRG) | |
DK | D | Denmark | |
... | ... | ... | |
FI | D | Finland | |
FR | D | France | |
HU | D | Hungary | |
IE | D | Ireland | |
IT | D | Italy | |
LU | D | Luxembourg | |
NL | D | Netherlands | |
NO | D | Norway | |
PL | D | Poland | |
PT | D | Portugal | |
RO | D | Romania | |
SE | D | Sweden | |
UK | D | United Kingdom | |
OBS_FLAG | OBS_FLAG | A | Observation flag. |
b | A | break in time series | |
c | A | confidential | |
d | A | definition differs, see metadata | |
e | A | estimated | |
f | A | forecast | |
i | A | see metadata (phased out) | |
n | A | not significant | |
p | A | provisional | |
r | A | revised | |
s | A | Eurostat estimate (phased out) | |
u | A | low reliability | |
z | A | not applicable | |
OBS_STATUS | OBS_STATUS | A | Observation status. |
- | A | not applicable or real zero or zero by default | |
0 | A | less than half of the unit used | |
na | A | not available |
63 rows × 2 columns
dresp2 = estat.get(resource_type = 'data',
resource_id = id2,
params = {'references': None,
#'startPeriod': '2014',
#'endPeriod': '2014'
})
dat2 = dresp2.data
dat2.dim_at_obs
'TIME_PERIOD'
ser2 = list(dat2.series)
len(ser2)
621
ser2[5].key
SeriesKey(UNIT='MIO_EUR', EXPEND='INV', GEO='DE', FREQ='A')
set(s.key.UNIT for s in dat2.series)
{'MIO_EUR', 'NR_COMP', 'PC_GDP'}
iter2 = (s for s in dat2.series if (s.key.GEO in ['EU15']) == False)
tab2 = dresp2.write(iter2)
tab2 = tab2.stack('GEO')
tab2.columns = tab2.columns.droplevel(['FREQ']) # Dropping 'FREQ' and 'UNIT' levels
tab2.swaplevel('UNIT', 'EXPEND', axis = 1)
tab2 = tab2.loc['2015', ['PC_GDP', 'MIO_EUR']]
# tab2.dropna(axis = 0, how = 'any', inplace = True)
tab2.index = tab2.index.droplevel(0) # Dropping the 'TIME_PERIOD' axis
tab2
UNIT | MIO_EUR | PC_GDP | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
EXPEND | INV | INV_BUY | INV_GROW | INV_REPL | INV_RESC | INV_VEN | INV_VEN_LATE | INV_VEN_SEED | INV_VEN_STAR | INV | INV_BUY | INV_GROW | INV_REPL | INV_RESC | INV_VEN | INV_VEN_LATE | INV_VEN_SEED | INV_VEN_STAR |
GEO | ||||||||||||||||||
AT | 109.0 | 27.0 | 54.0 | 0.0 | 0.0 | 27.0 | 5.0 | 10.0 | 13.0 | 0.032 | NaN | NaN | NaN | NaN | 0.008 | NaN | NaN | NaN |
BE | 722.0 | 562.0 | 68.0 | 18.0 | 11.0 | 63.0 | 30.0 | 0.0 | 33.0 | 0.176 | NaN | NaN | NaN | NaN | 0.015 | NaN | NaN | NaN |
BG | 16.0 | 11.0 | 5.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.036 | NaN | NaN | NaN | NaN | 0.002 | NaN | NaN | NaN |
CH | 640.0 | 197.0 | 107.0 | 27.0 | 26.0 | 283.0 | 143.0 | 5.0 | 134.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
CZ | 13.0 | 0.0 | 9.0 | 0.0 | 0.0 | 4.0 | 1.0 | 1.0 | 1.0 | 0.008 | NaN | NaN | NaN | NaN | 0.002 | NaN | NaN | NaN |
DE | 5996.0 | 4515.0 | 554.0 | 165.0 | 6.0 | 757.0 | 330.0 | 44.0 | 383.0 | 0.198 | NaN | NaN | NaN | NaN | 0.025 | NaN | NaN | NaN |
DK | 1162.0 | 832.0 | 35.0 | 5.0 | 0.0 | 290.0 | 53.0 | 8.0 | 230.0 | 0.437 | NaN | NaN | NaN | NaN | 0.109 | NaN | NaN | NaN |
EL | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | NaN | NaN | NaN | NaN | 0.000 | NaN | NaN | NaN |
ES | 1051.0 | 667.0 | 152.0 | 81.0 | 45.0 | 105.0 | 40.0 | 3.0 | 62.0 | 0.097 | NaN | NaN | NaN | NaN | 0.010 | NaN | NaN | NaN |
FI | 512.0 | 371.0 | 40.0 | 2.0 | 1.0 | 98.0 | 25.0 | 9.0 | 64.0 | 0.247 | NaN | NaN | NaN | NaN | 0.047 | NaN | NaN | NaN |
FR | 9451.0 | 6848.0 | 1743.0 | 112.0 | 3.0 | 744.0 | 364.0 | 11.0 | 370.0 | 0.433 | NaN | NaN | NaN | NaN | 0.034 | NaN | NaN | NaN |
HU | 118.0 | 94.0 | 0.0 | 0.0 | 0.0 | 24.0 | 3.0 | 3.0 | 18.0 | 0.109 | NaN | NaN | NaN | NaN | 0.022 | NaN | NaN | NaN |
IE | 161.0 | 64.0 | 9.0 | 0.0 | 0.0 | 89.0 | 38.0 | 0.0 | 50.0 | 0.075 | NaN | NaN | NaN | NaN | 0.041 | NaN | NaN | NaN |
IT | 1160.0 | 974.0 | 123.0 | 30.0 | 0.0 | 33.0 | 13.0 | 2.0 | 18.0 | 0.071 | NaN | NaN | NaN | NaN | 0.002 | NaN | NaN | NaN |
LU | 74.0 | 32.0 | 0.0 | 0.0 | 0.0 | 41.0 | 7.0 | 0.0 | 34.0 | 0.141 | NaN | NaN | NaN | NaN | 0.079 | NaN | NaN | NaN |
NL | 1708.0 | 1031.0 | 388.0 | 46.0 | 23.0 | 220.0 | 64.0 | 8.0 | 147.0 | 0.252 | NaN | NaN | NaN | NaN | 0.032 | NaN | NaN | NaN |
NO | 1165.0 | 730.0 | 344.0 | 0.0 | 0.0 | 92.0 | 53.0 | 2.0 | 37.0 | 0.333 | NaN | NaN | NaN | NaN | 0.026 | NaN | NaN | NaN |
PL | 802.0 | 653.0 | 115.0 | 5.0 | 0.0 | 29.0 | 15.0 | 3.0 | 12.0 | 0.188 | NaN | NaN | NaN | NaN | 0.007 | NaN | NaN | NaN |
PT | 150.0 | 40.0 | 27.0 | 13.0 | 0.0 | 70.0 | 18.0 | 5.0 | 47.0 | 0.084 | NaN | NaN | NaN | NaN | 0.039 | NaN | NaN | NaN |
RO | 50.0 | 46.0 | 2.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.031 | NaN | NaN | NaN | NaN | 0.001 | NaN | NaN | NaN |
SE | 1722.0 | 1429.0 | 106.0 | 37.0 | 0.0 | 150.0 | 57.0 | 0.0 | 93.0 | 0.388 | NaN | NaN | NaN | NaN | 0.034 | NaN | NaN | NaN |
UK | 20521.0 | 17382.0 | 2157.0 | 67.0 | 82.0 | 833.0 | 512.0 | 10.0 | 310.0 | 0.799 | NaN | NaN | NaN | NaN | 0.032 | NaN | NaN | NaN |
source = ColumnDataSource(data = dict(names = list(tab2.index), invest = tab2.loc[:,('PC_GDP','INV')], venture = tab2.loc[:,('PC_GDP','INV_VEN')]))
p = figure(plot_width = 600, plot_height = 600)
p.square(x = tab2.loc[:,('PC_GDP','INV')], y = tab2.loc[:,('PC_GDP','INV_VEN')], size = list(tab2.loc[:,('MIO_EUR','INV')] / 500), color = "blue", alpha = 0.6)
p.xaxis.axis_label = 'Total R&D Investment'
p.yaxis.axis_label = 'Venture Capital R&D Investment'
labels = LabelSet(x = 'invest',
y = 'venture',
text = 'names',
source = source,
render_mode='canvas',
level='glyph')
p.add_layout(labels)
show(p)