from pandasdmx import Request
import pandas as pd
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, LabelSet
estat = Request('ESTAT')
dflow = estat.dataflow()
def dsd_resp(id):
"""
The function takes a dataflow identifier and requests from the Eurostat SDMX api the dataflow's datastructure, datastructure id and datastructure response.
"""
# Defining the data set's dataflow based on the data set id.
df = dflow.dataflow[id]
# Printing the table's name
print("The table's name:")
print(dflow.write().dataflow.loc[id][0])
# Acquiring the dataflow's datastructure id
dsd_id = df.structure.id
# Requesting for the dataflow's datastructure
dsd_resp = estat.get(resource_type = 'datastructure', resource_id = dsd_id)
# Requesting the dataflow's datastructure
dsd = dsd_resp.datastructure[dsd_id]
return dsd_resp, dsd_id, dsd
def dsd_att(dsd):
"""The function takes a table datastructure definition and prints its measure, dimension and attribute lists."""
# Measures
print("Measures:")
print(dsd.measures.aslist())
# Dimensions
print("\nDimensions:")
print(dsd.dimensions.aslist())
# Attributes
print("\nAttributes:")
print(dsd.attributes.aslist())
dflow.write().dataflow[dflow.write().dataflow.index.str.startswith('htec') == True]
name | |
---|---|
dataflow | |
htec_cis3 | Innovation in high-tech sectors in SMEs (CIS3)... |
htec_cis4 | Innovation in high-tech sectors (CIS 2004), EU... |
htec_cis5 | Innovation in high-tech sectors (CIS 2006), EU... |
htec_cis6 | Innovation in high-tech sectors (CIS 2008, CIS... |
htec_eco_ent | Enterprises in high-tech sectors by NACE Rev.1... |
htec_eco_ent2 | Enterprises in high-tech sectors by NACE Rev.2... |
htec_eco_sbs | Economic data in high-tech sectors by NACE Rev... |
htec_eco_sbs2 | Economic data in high-tech sectors by NACE Rev... |
htec_emp_nat | Employment in technology and knowledge-intensi... |
htec_emp_nat2 | Employment in technology and knowledge-intensi... |
htec_emp_nisced | Employment in technology and knowledge-intensi... |
htec_emp_nisced2 | Employment in technology and knowledge-intensi... |
htec_emp_nisco | Employment in technology and knowledge-intensi... |
htec_emp_nisco2 | Employment in technology and knowledge-intensi... |
htec_emp_reg | Employment in technology and knowledge-intensi... |
htec_emp_reg2 | Employment in technology and knowledge-intensi... |
htec_emp_risced | Employment in technology and knowledge-intensi... |
htec_emp_risced2 | Employment in technology and knowledge-intensi... |
htec_emp_risco | Employment in technology and knowledge-intensi... |
htec_emp_risco2 | Employment in technology and knowledge-intensi... |
htec_emp_sbs | Employment statistics on high-tech industries ... |
htec_emp_sbs2 | Employment statistics on high-tech industries ... |
htec_kia_emp | Annual data on employment in knowledge-intensi... |
htec_kia_emp2 | Annual data on employment in knowledge-intensi... |
htec_si_exp4 | High-tech exports - Exports of high technology... |
htec_sti_exp | Business enterprise R&D expenditure in high-te... |
htec_sti_exp2 | Business enterprise R&D expenditure in high-te... |
htec_sti_pers | Business enterprise R&D personnel in high-tech... |
htec_sti_pers2 | Business enterprise R&D personnel in high-tech... |
htec_trd_group4 | High-tech trade by high-tech group of products... |
htec_trd_tot4 | Total high-tech trade in million euro and as a... |
htec_vci_stage1 | Venture capital investment by aggregated stage... |
htec_vci_stage2 | Venture capital investment by detailed stage o... |
id1 = 'htec_sti_exp2'
dflow.write().dataflow.loc[id1][0]
'Business enterprise R&D expenditure in high-tech sectors - NACE Rev. 2'
id2 = 'htec_vci_stage2'
dflow.write().dataflow.loc[id2][0]
'Venture capital investment by detailed stage of development (from 2007, source: EVCA)'
dsd_resp1, dsd_id1, dsd1 = dsd_resp(id1)
The table's name: Business enterprise R&D expenditure in high-tech sectors - NACE Rev. 2
dsd_att(dsd1)
Measures: [PrimaryMeasure | OBS_VALUE] Dimensions: [Dimension | FREQ, Dimension | UNIT, Dimension | NACE_R2, Dimension | GEO, TimeDimension | TIME_PERIOD] Attributes: [DataAttribute | OBS_FLAG, DataAttribute | OBS_STATUS]
Codelist:
dsd_resp1.write().codelist
dim_or_attr | name | ||
---|---|---|---|
FREQ | FREQ | D | FREQ |
A | D | Annual | |
D | D | Daily | |
H | D | Half-year | |
M | D | Monthly | |
Q | D | Quarterly | |
S | D | Semi-annual | |
W | D | Weekly | |
UNIT | UNIT | D | UNIT |
MIO_EUR | D | Million euro | |
NACE_R2 | NACE_R2 | D | NACE_R2 |
C | D | Manufacturing | |
C_HTC | D | High-technology manufacturing | |
C_HTC_M | D | Medium high-technology manufacturing | |
C_LTC | D | Low-technology manufacturing | |
C_LTC_M | D | Medium low-technology manufacturing | |
G-N | D | Services of the business economy | |
TOTAL | D | Total - all NACE activities | |
GEO | GEO | D | GEO |
AT | D | Austria | |
BE | D | Belgium | |
BG | D | Bulgaria | |
CH | D | Switzerland | |
CN_X_HK | D | China except Hong Kong | |
CY | D | Cyprus | |
CZ | D | Czech Republic | |
DE | D | Germany (until 1990 former territory of the FRG) | |
DK | D | Denmark | |
EE | D | Estonia | |
EL | D | Greece | |
... | ... | ... | |
NL | D | Netherlands | |
NO | D | Norway | |
PL | D | Poland | |
PT | D | Portugal | |
RO | D | Romania | |
RS | D | Serbia | |
RU | D | Russia | |
SE | D | Sweden | |
SI | D | Slovenia | |
SK | D | Slovakia | |
TR | D | Turkey | |
UK | D | United Kingdom | |
US | D | United States | |
OBS_FLAG | OBS_FLAG | A | Observation flag. |
b | A | break in time series | |
c | A | confidential | |
d | A | definition differs, see metadata | |
e | A | estimated | |
f | A | forecast | |
i | A | see metadata (phased out) | |
n | A | not significant | |
p | A | provisional | |
r | A | revised | |
s | A | Eurostat estimate (phased out) | |
u | A | low reliability | |
z | A | not applicable | |
OBS_STATUS | OBS_STATUS | A | Observation status. |
- | A | not applicable or real zero or zero by default | |
0 | A | less than half of the unit used | |
na | A | not available |
76 rows × 2 columns
dresp1 = estat.get(resource_type = 'data',
resource_id = id1,
params = {'references': None,
'startPeriod': '2014',
'endPeriod': '2014'
})
dat1 = dresp1.data
dat1.dim_at_obs
'TIME_PERIOD'
ser1 = list(dat1.series)
len(ser1)
280
ser1[5].key
SeriesKey(UNIT='MIO_EUR', NACE_R2='C', GEO='CY', FREQ='A')
set(s.key.GEO for s in dat1.series)
{'AT', 'BE', 'BG', 'CH', 'CN_X_HK', 'CY', 'CZ', 'DE', 'DK', 'EE', 'EL', 'ES', 'EU28', 'FI', 'FR', 'HR', 'HU', 'IE', 'IS', 'IT', 'JP', 'KR', 'LT', 'LU', 'LV', 'ME', 'MT', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'RU', 'SE', 'SI', 'SK', 'TR', 'UK', 'US'}
iter1 = (s for s in dat1.series if (s.key.GEO in ['CN_X_HK', 'EU28', 'US']) == False)
tab1 = dresp1.write(iter1)
tab1 = tab1.stack('GEO')
tab1.columns = tab1.columns.droplevel(['FREQ','UNIT']) # Dropping 'FREQ' and 'UNIT' levels
tab1 = tab1.loc['2014']
tab1.index = tab1.index.droplevel(0) # Dropping the 'TIME_PERIOD' axis
tab1.dropna(axis = 0, how = 'any', inplace = True)
tab1
NACE_R2 | C | C_HTC | C_HTC_M | C_LTC | C_LTC_M | G-N | TOTAL |
---|---|---|---|---|---|---|---|
GEO | |||||||
CY | 5.0 | 4.0 | 0.0 | 0.0 | 1.0 | 4.0 | 15.0 |
CZ | 976.0 | 107.0 | 609.0 | 37.0 | 223.0 | 688.0 | 1730.0 |
DE | 49482.0 | 11541.0 | 33187.0 | 1343.0 | 3401.0 | 7061.0 | 56997.0 |
DK | 2814.0 | 1526.0 | 956.0 | 236.0 | 96.0 | 1985.0 | 4865.0 |
ES | 3096.0 | 748.0 | 1519.0 | 426.0 | 403.0 | 3212.0 | 6784.0 |
FI | 3126.0 | 1873.0 | 892.0 | 190.0 | 171.0 | 1133.0 | 4410.0 |
HR | 85.0 | 49.0 | 28.0 | 6.0 | 3.0 | 73.0 | 164.0 |
HU | 526.0 | 232.0 | 201.0 | 44.0 | 49.0 | 435.0 | 1022.0 |
IT | 8736.0 | 1822.0 | 5029.0 | 966.0 | 919.0 | 3163.0 | 12344.0 |
LT | 50.0 | 15.0 | 17.0 | 12.0 | 5.0 | 64.0 | 116.0 |
MT | 10.0 | 5.0 | 3.0 | 1.0 | 1.0 | 23.0 | 33.0 |
NL | 4389.0 | 942.0 | 2455.0 | 463.0 | 529.0 | 2627.0 | 7433.0 |
PL | 821.0 | 104.0 | 399.0 | 145.0 | 173.0 | 891.0 | 1800.0 |
SI | 455.0 | 204.0 | 165.0 | 22.0 | 65.0 | 224.0 | 689.0 |
TR | 1562.0 | 166.0 | 1064.0 | 117.0 | 216.0 | 1404.0 | 3014.0 |
UK | 9656.0 | 1745.0 | 6080.0 | 689.0 | 1143.0 | 14296.0 | 24730.0 |
list(tab1.index)
['CY', 'CZ', 'DE', 'DK', 'ES', 'FI', 'HR', 'HU', 'IT', 'LT', 'MT', 'NL', 'PL', 'SI', 'TR', 'UK']
source = ColumnDataSource(data = dict(names = list(tab1.index),
manuf = tab1.loc[:,'C'],
serv = tab1.loc[:,'G-N']))
p = figure(plot_width = 600, plot_height = 600)
p.scatter(x = 'manuf' , y = 'serv', size = 10, color = "purple", alpha = 0.6, source = source)
p.xaxis.axis_label = 'Manufacturing'
p.yaxis.axis_label = 'Business Services'
labels = LabelSet(x = 'manuf', y = 'serv', text = 'names', source = source, render_mode='canvas', level='glyph')
p.add_layout(labels)
show(p)