#!/usr/bin/env python # coding: utf-8 # # *Statistics coded:* Comparative price levels of consumer goods and services # # Prepared by [**Mattia Girardi**](https://www.linkedin.com/in/mattia-g-2b676b88) ([EMOS](https://ec.europa.eu/eurostat/web/european-statistical-system/emos)). # # This notebook aims at reproducing the illustrations in the *Statistics Explained* article on **[comparative price levels of consumer goods and services](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Comparative_price_levels_of_consumer_goods_and_services)**. In particular, the goal of this article is to carryi out an analysis for the price levels for consumer goods and services in the **[European Union (EU)](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:European_Union_%28EU%29)**. # # Data are based on **[Price Level Indices (PLIs)](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Price_level_index_%28PLI%29)**, which provide a comparison of countries' price levels relative to the EU average and are calculated using **[Purchasing Power Parities](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Purchasing_power_parities_%28PPPs%29)**. # # ## Settings # # In this work we will use three main packages: # * [`eurostat`](https://pypi.org/project/eurostat/), for retrieving data, # * [`pandas`](https://pandas.pydata.org), for manipulating data, # * [`plotly`](https://plotly.com), for plotting interactive graphs. # In[ ]: # Importing the packages that are used across the project import os import eurostat import pandas as pd import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots import plotly.io as pio # # Interactive representations are generated using the `plotly.io.write_html` method in the rest of the notebook. # These plots will be saved in a dedicated directory: # In[ ]: _DOMAIN_ = "economy/comparative-price-consumer-goods-services" _SAVDIR_ = "../../docs/%s" % _DOMAIN_ # ## Representing Data # # ### Overall Price Levels # # In this section, the countries analyzed are the **[27 EU Member States](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:European_Union_28%EU%29)**, United Kingdom, three **[EFTA](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:European_Free_Trade_Association_28%EFTA%29)** countries (Iceland, Norway and Switzerland), five **[candidate](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Candidate_countries)** countries (Albania, Montenegro, North Macedonia, Serbia and Turkey and one **[potential](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Potential_candidates&redirect=no)** candidate country (Bosnia and Herzegovina); as well as, the United States and Japan (for having an extra-EU comparison). # # In the following code, data are retrieved using the `Python` package [`eurostat`](https://pypi.org/project/eurostat/) which collects bulk data from the online page (and not from the API). # # First of all, we create a variable that stores all the available datasets (in `dataframe` format); this will help us in picking up the required dataset. # In[ ]: # We retrieve and save all the available datasets in a dataframe toc_df = eurostat.get_toc_df() # In `toc_df`, metadata are stored for each dataset listed, not the data. For extracting data, it is required a `code`, which is contained in the second column of `toc_df` dataframe. # So, we select the dataset that we need for this section, which has a title named *"Comparative price levels"*. In order to pick the information about that, we use the function `eurostat.subset_toc_df`, which allows us to search for the info required, filtering for a specific string, *"comparative price"*. # In[ ]: # Filtering for the dataset we search for developing the project toc_df_subset = eurostat.subset_toc_df(toc_df, 'comparative price') # Now, we can get the *code* required for extracting data, printing `toc_df_subset`. # In[ ]: print(toc_df_subset) # The required *code* is reported in the second columns; therefore, by inserting it in the function `eurostat.get_data_df` we can extract the bulk data: # In[ ]: # Retrieving the dataset and saving it in a variable df = eurostat.get_data_df('tec00120') print(df.head()) df1 = df.drop(index = [10,15,17,44]) print(df1.head()) # In order to have a clear understanding of the data we will use, we print out the *dictionary* for the data category: data are codified through alpha-numeric format and categorized according to their scope. The *dictionary* explains the meaning of each *category code*. # # This procedure will be particularly useful for the sections in which we will deal with multiple categories. # In[ ]: # Saving a dictionary for understanding categories dic = eurostat.get_dic('ppp_cat') print(dic) # We want to represent the [first image](https://ec.europa.eu/eurostat/statistics-explained/statexpservices/chart/index.php?title=Comparative_price_levels_of_consumer_goods_and_services) in the article. # # # We are going to represent a plot which shows different colours for the Countries'categories, their status with respect to the EU. To do this, we use a `for` loop: # # * we create an empty list; # * we write a for loop that, depending on the text reported in every single row, it will associate a macro-area country by country; # * each for loop result is appended to the empty list, so, given the main-dataframe order, there will correspond a given area, # * we create a new dataset variable adding the list we have made. # In[ ]: # Writing the for loop area = [] for i in df1['geo\\time']: if i == 'JP' or i == 'US' or i == 'TR' or i == 'MK' or i == 'RS' or i == 'ME' or i == 'AL' or i == 'BA' or i == 'ME': area.append('Etxra-EU') elif i == 'UK': area.append('Former EU') elif i == 'EA19' or i == 'EU27_2020': area.append('Aggregate EU') else: area.append('EU') # Inserting the list to the dataset df1['area'] = area # check the dataset print(df1.head()) # Now, we can represent the barplot. # In[ ]: fig1 = px.bar(df1, x = 'geo\\time', y = 2019, labels = dict(area = 'Area'), color = "area", color_discrete_sequence = ["orange", "blue","crimson", "steelblue"], opacity = 0.8) fig1.update_xaxes(tickangle = 45) fig1.update_layout(title = 'Price Level Index for Final Household Consumption Expenditure 2019', xaxis = {'categoryorder':'array', 'categoryarray':['EU27_2020','EA19', 'AT', 'BE', 'BG', 'CH','CY','CZ', 'DE', 'DK', 'EE', 'EL', 'ES', 'FI', 'FR', 'HR', 'HU', 'IE', 'IS', 'IT', 'LT', 'LU', 'LV', 'MT', 'NL','NO', 'PL', 'PT', 'RO', 'SE', 'SI', 'SK','UK','AL','BA','ME','MK','RS','TR','US','JP']}, showlegend = False) fig1.layout.template = 'plotly_white' fig1.update_xaxes(title="Countries") fig1.show() pio.write_html(fig1, file=os.path.join(_SAVDIR_,'figure1.html'), auto_open=True) # # [**Interactive plot available here**](https://eurostat.github.io/statistics-coded/economy/comparative-price-consumer-goods-services/figure1.html) # We create the graph representing even the data for 2018, comparing the results, with a graph that is similar to the previous one. # # This time we proceed using a `for` loop assigning a color for each *area*; in this way, we can make a barplot assigning pre-defined colours for each *area*. # In[ ]: # Comparative price level for 2018 & 2019 colors = [] for i in df1['area']: if i == 'Etxra-EU': colors.append('orange') elif i == 'Former EU': colors.append('steelblue') elif i == 'Aggregate EU': colors.append('crimson') else: colors.append('blue') df2 = df1 df2['color'] = colors fig2 = go.Figure() fig2.add_trace(go.Bar(x = df2['geo\\time'], y = df2[2018], name = '2018', marker_color = df2['color'])) fig2.add_trace(go.Bar(x = df2['geo\\time'], y = df2[2019], name = '2019', marker_color = df2['color'], opacity = 0.6)) fig2.update_xaxes(tickangle = 45) fig2.layout.template = 'plotly_white' fig2.update_layout( title = 'Price Level Index for Final Household Consumption Expenditure for 2019 and 2018\
(the opaque colors are for 2018)', xaxis = {'categoryorder':'array', 'categoryarray':['EU27_2020', 'EA19', 'AT', 'BE', 'BG', 'CH', 'CY', 'CZ', 'DE', 'DK', 'EE', 'EL', 'ES', 'FI', 'FR', 'HR', 'HU', 'IE', 'IS', 'IT', 'LT', 'LU', 'LV', 'MT', 'NL', 'NO', 'PL', 'PT', 'RO', 'SE', 'SI', 'SK', 'UK', 'AL', 'BA', 'ME', 'MK', 'RS', 'TR', 'US', 'JP']}, yaxis = dict( title = 'Values (€)', titlefont_size = 15, tickfont_size = 14, ), showlegend = True ) fig2.update_xaxes(title="Countries") fig2.show() pio.write_html(fig2, file=os.path.join(_SAVDIR_,'figure2.html'), auto_open=True) # # [**Interactive plot available here**](https://eurostat.github.io/statistics-coded/economy/comparative-price-consumer-goods-services/figure2.html) # ### Figure 1: Price level index sorted # # Now, we want to represent the data for [**HCFE**](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Household_final_consumption_expenditure_%28HFCE%29) by an increasing order, through a plot similar to [Figure 1](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=File:Price_level_index_for_final_household_consumption_expenditure_(HFCE)_2019,_(EU-27%3D100).png). # # # Instead of reporting values at the top of the bar, we will plot an interactive graph. # Through the following code we define the color for all of the countries, then we assign a different color for `"EA19"`. After that we have just to create a plot, defining the *ascending* order. # In[ ]: # Overall price level # Defining the colors colors2 = ['purple'] * 38 colors2[10] = 'orange' # Remove the rows that will not be useful for mine purposes df2 = df1.drop(index = [16,25,43]) # Plotting the graph fig3 = px.bar(df2, x = 'geo\\time', y = 2019, labels = dict(countries = 'Countries'), color = colors2, opacity = 0.8) fig3.update_xaxes(tickangle = 45) fig3.layout.template = 'plotly_white' fig3.update_layout(title ='Price Level Index for Final Household Consumption Expenditure 2019 (EU-27=100)', xaxis ={'categoryorder':'total ascending'}, showlegend = False) fig3.update_xaxes(title="Countries") fig3.update_yaxes(title="Values") fig3.show() pio.write_html(fig3, file=os.path.join(_SAVDIR_,'figure3.html'), auto_open=True) # # [**Interactive plot available here**](https://eurostat.github.io/statistics-coded/economy/comparative-price-consumer-goods-services/figure3.html) # ### Generic households consumption # # # # # # # # We retrieve data using the `eurostat.get_data_df` method, which extracts data by code (_prc_ppp_ind_), and store it in a variable. # We then select the data referring to the [*Price Level Indices (PLIs)*](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Price_level_index_%28PLI%29) for the 27 EU Member States (updated in 2020), filtering by the `.loc` method. We use this last command even for filtering only the countries listed at the beginning of the article. # In[ ]: # Retrieving data for tables' plots table_1 = eurostat.get_data_df('prc_ppp_ind') # filtering data by index tab3_2 = table_1.loc[table_1['na_item'] == "PLI_EU27_2020"] # filtering data by countries tab3_3 = tab3_2.loc[(tab3_2['geo\\time'] == 'EA19') | (tab3_2['geo\\time'] == 'AT') | (tab3_2['geo\\time'] == 'BE')| (tab3_2['geo\\time'] == 'BG') | (tab3_2['geo\\time'] == 'CH') | (tab3_2['geo\\time'] == 'CY') | (tab3_2['geo\\time'] == 'CZ') | (tab3_2['geo\\time'] == 'DE') | (tab3_2['geo\\time'] == 'DK') | (tab3_2['geo\\time'] == 'EE') | (tab3_2['geo\\time'] == 'EL') | (tab3_2['geo\\time'] == 'ES') | (tab3_2['geo\\time'] == 'FI') | (tab3_2['geo\\time'] == 'FR') | (tab3_2['geo\\time'] == 'HR' ) | (tab3_2['geo\\time'] == 'HU') | (tab3_2['geo\\time'] == 'IE') | (tab3_2['geo\\time'] == 'IS') | (tab3_2['geo\\time'] == 'IT') | (tab3_2['geo\\time'] == 'LT') | (tab3_2['geo\\time'] == 'LU') | (tab3_2['geo\\time'] == 'LV') | (tab3_2['geo\\time'] == 'MT') | (tab3_2['geo\\time'] == 'NL') | (tab3_2['geo\\time'] == 'NO') | (tab3_2['geo\\time'] == 'PL') | (tab3_2['geo\\time'] == 'PT') | (tab3_2['geo\\time'] == 'RO') | (tab3_2['geo\\time'] == 'SE') | (tab3_2['geo\\time'] == 'SI') | (tab3_2['geo\\time'] == 'SK') | (tab3_2['geo\\time'] == 'UK') | (tab3_2['geo\\time'] == 'AL') | (tab3_2['geo\\time'] == 'BA') | (tab3_2['geo\\time'] == 'ME') | (tab3_2['geo\\time'] == 'MK') | (tab3_2['geo\\time'] == 'RS') | (tab3_2['geo\\time'] == 'TR')] # ### Table 1: Price levels for food, beverages, tobacco, clothing and footwear # # We represent [**Table 1**](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=File:Price_level_index_for_food,_beverages,_clothing_and_footwear,_2019,_EU-27%3D100_v2.png). # To do this, we filter data for the goods'/services' categories (using the codes provided in the above-represented dictionary). The codes are: # * `"A0101"`, indicating Food and non-alcoholic beverages; # * `"A0102"`, indicating Alcoholic beverages, tobacco and narcotics; # * `"A010301"`, indicating Clothing; # * `"A010302"`, for Footwear category. # # Then, by a `for` loop, we assign the string that allows interpreting each category code. # In[ ]: # Filtering for categories final1 = tab3_3.loc[(tab3_3['ppp_cat'] == "A0101") | (tab3_3['ppp_cat'] == "A0102") | (tab3_3['ppp_cat'] == "A010301") | (tab3_3['ppp_cat'] == "A010302")].copy() # Explicating categories' codes category1 = [] for i in final1['ppp_cat']: if i == 'A0101': category1.append('Food and non-alcoholic beverages') elif i == 'A0102': category1.append('Alcoholic beverages, tobacco and narcotics') elif i == 'A010301': category1.append('Clothing') else: category1.append('Footwear') final1['ppp_cat_expl1'] = category1 print(final1.head()) # After editing the dataset, we adjust it for the five subplots (one for each category previously mentioned and one for **[HCFE](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Household_final_consumption_expenditure_%28HFCE%29)**. # # First of all, we subset the main-dataset by category. # Secondly, we make the subplot for [**HCFE**](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Household_final_consumption_expenditure_%28HFCE%29) (filtering for its code, `"EO11"`). This subplot will be used across all of three graphs in this section. we use the same already described technique for filtering and explicating the category code. Besides, we assign the bar-colour to a variable. Furthermore, since we want to highlight the highest and lowest values, we *reset the index* of the dataset, using `.reset_index`. we are going to define a different colour for the maximum and minimum values by `.max` and `.min` functions, and, by their results, we will locate the index, `final1hcfe.index[final1hcfe[2019] == maxhcfe]`, corresponding to max and min, storing this result in a list form, appending `.tolist()` to the previous command. The output of this procedure is an *integer* that will be saved in a variable. This variable will be used for defining a specific colour. # In[ ]: # Definig the dataset for each subplot, by filtering the category s1final1=final1.loc[(tab3_3['ppp_cat'] == "A0101")].copy() s2final1=final1.loc[(tab3_3['ppp_cat'] == "A0102")].copy() s3final1=final1.loc[(tab3_3['ppp_cat'] == "A010301")].copy() s4final1=final1.loc[(tab3_3['ppp_cat'] == "A010302")].copy() ## SUBPLOT for HFCE # Making a subset final1hcfe = tab3_3.loc[(tab3_3['ppp_cat'] == "E011")] # Explicating the category code for HCFE hcfe = [] for i in final1hcfe['ppp_cat']: if i == 'E011': hcfe.append('HCFE') final1hcfe['hcfe'] = hcfe # Defining the color for the bars colorsub_hcfe = ['royalblue'] * 38 # Resetting indexes to easily find the country corresponding to the max and min value, selecting that index for # making its bar differently-coloured final1hcfe = final1hcfe.reset_index(drop=True) final1hcfe.reset_index(drop=True, inplace=True) # Identifying the highest and lowest values in the subset maxhcfe = final1hcfe[2019].max() minhcfe = final1hcfe[2019].min() # Identifying the highest and lowest values in the subset by index and storing the results in variables max_fhcfe = final1hcfe.index[final1hcfe[2019] == maxhcfe].tolist() max_fhcfe = max_fhcfe[0] min_fhcfe = final1hcfe.index[final1hcfe[2019] == minhcfe].tolist() min_fhcfe = min_fhcfe[0] # Assigning different color to max and min values colorsub_hcfe[max_fhcfe] = ['midnightblue'] colorsub_hcfe[min_fhcfe] = ['midnightblue'] # We use the same method for the other categories' subplots: # In[ ]: # Resetting indexes to find the country corresponding to the max and min value, selecting that index for # making its bar differently-coloured s1final1 = s1final1.reset_index(drop=True) s1final1.reset_index(drop=True, inplace=True) s2final1 = s2final1.reset_index(drop=True) s2final1.reset_index(drop=True, inplace=True) s3final1 = s3final1.reset_index(drop=True) s3final1.reset_index(drop=True, inplace=True) s4final1 = s4final1.reset_index(drop=True) s4final1.reset_index(drop=True, inplace=True) ## SUBPLOT1 # Defining the color colorsub_s1final1 = ['slategrey'] * 38 # Defining the highest value max_s1final1 = s1final1[2019].max() f_s1final1 = s1final1.index[s1final1[2019] == max_s1final1].tolist() f_s1final1 = f_s1final1[0] # Defining the lowest value min_s1final1 = s1final1[2019].min() f_s1final1_min = s1final1.index[s1final1[2019] == min_s1final1].tolist() f_s1final1_min = f_s1final1_min[0] # Defining the colors for max and min values colorsub_s1final1[f_s1final1] = ['midnightblue'] colorsub_s1final1[f_s1final1_min] = ['midnightblue'] ## SUBPLOT2 # Defining the color colorsub_s2final1 = ['dodgerblue'] * 38 # Defining the highest value max_s2final1 = s2final1[2019].max() f_s2final1 = s2final1.index[s2final1[2019] == max_s2final1].tolist() f_s2final1 = f_s2final1[0] # Defining the lowest value min_s2final1 = s2final1[2019].min() f_s2final1_min = s2final1.index[s2final1[2019] == min_s2final1].tolist() f_s2final1_min = f_s2final1_min[0] # Defining the colors for max and min values colorsub_s2final1[f_s2final1] = ['midnightblue'] colorsub_s2final1[f_s2final1_min] = ['midnightblue'] ## SUBPLOT3 # Defining the color colorsub_s3final1 = ['lightsteelblue'] * 38 # Defining the highest value max_s3final1 = s3final1[2019].max() f_s3final1 = s3final1.index[s3final1[2019] == max_s3final1].tolist() f_s3final1 = f_s3final1[0] # Defining the lowest value min_s3final1 = s3final1[2019].min() f_s3final1_min = s3final1.index[s3final1[2019] == min_s3final1].tolist() f_s3final1_min = f_s3final1_min[0] # Defining the colors for max and min values colorsub_s3final1[f_s3final1] = ['midnightblue'] colorsub_s3final1[f_s3final1_min] = ['midnightblue'] ## SUBPLOT4 # Defining the color colorsub_s4final1 = ['steelblue'] * 38 # Defining the highest value max_s4final1 = s4final1[2019].max() f_s4final1 = s4final1.index[s4final1[2019] == max_s4final1].tolist() f_s4final1 = f_s4final1[0] # Defining the lowest value min_s4final1 = s4final1[2019].min() f_s4final1_min = s4final1.index[s4final1[2019] == min_s4final1].tolist() f_s4final1_min = f_s4final1_min[0] # Defining the colors for max and min values colorsub_s4final1[f_s4final1] = ['midnightblue'] colorsub_s4final1[f_s4final1_min] = ['midnightblue'] # Finally, we plot the subplots: # In[ ]: # Plotting fig4 = make_subplots(rows=5, cols=1, x_title='Countries', y_title='Values (€)',shared_xaxes=True, shared_yaxes=True) fig4.add_bar(x = final1hcfe['geo\\time'], y = final1hcfe[2019], name='HCFE', marker_color = colorsub_hcfe, row = 1, col = 1) fig4.add_bar(x = s1final1['geo\\time'], y = s1final1[2019], name='Food', marker_color = colorsub_s1final1, row = 2, col = 1) fig4.add_bar(x = s2final1['geo\\time'], y = s2final1[2019], name='Beverages', marker_color = colorsub_s2final1, row = 3, col = 1) fig4.add_bar(x = s3final1['geo\\time'], y = s3final1[2019], name='Clothing', marker_color = colorsub_s3final1, row = 4, col = 1) fig4.add_bar(x = s4final1['geo\\time'], y = s4final1[2019], name='Footwear', marker_color = colorsub_s4final1, row = 5, col = 1) fig4.update_xaxes(tickangle = 45) fig4.layout.template = 'plotly_white' fig4.update_layout( title = "Price levels for food, beverages, tobacco, clothing and footwear 2019\
(darker bars represent the highest and lowest values)", showlegend = True ) fig4.show() pio.write_html(fig4, file=os.path.join(_SAVDIR_,'figure4.html'), auto_open=True) # # [**Interactive plot available here**](https://eurostat.github.io/statistics-coded/economy/comparative-price-consumer-goods-services/figure4.html) # ### Table 2: Price levels for energy, furniture, household appliances and consumer electronics # # We represent [Table 2](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=File:Price_level_index_for_energy,_furniture,_household_appliances_and_consumer_electronics,_2019,_EU-27%3D100_v2.png). # The methodology is the same used in the previous section. There will not be any code referring to [**HCFE**](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Household_final_consumption_expenditure_(HFCE)) since everything was previously defined. # Hereby, the codes analysed refer to the consumption of: # * `"A010405"`, Electricity; # * `"A010501"`, Furniture; # * `"A010503"`, Households appliances; # * `"A050102"`, Electronics. # In[ ]: # Filtering for categories final2 = tab3_3.loc[(tab3_3['ppp_cat'] == "A010405") | (tab3_3['ppp_cat'] == "A010501") | (tab3_3['ppp_cat'] == "A010503") | (tab3_3['ppp_cat']== "A050102")].copy() # Explicating categories'codes category2 = [] for i in final2['ppp_cat']: if i == 'A010405': category2.append('Electricity') elif i == 'A010501': category2.append('Furniture') elif i == 'A010503': category2.append('Households appliances') else: category2.append('Consumer electronics') final2['ppp_cat_expl'] = category2 print(final2.head()) # In[ ]: # Definig the dataset for each subplot, by filtering the category s1final2=final2.loc[(tab3_3['ppp_cat'] == "A010405")] s2final2=final2.loc[(tab3_3['ppp_cat'] == "A010501")] s3final2=final2.loc[(tab3_3['ppp_cat'] == "A010503")] s4final2=final2.loc[(tab3_3['ppp_cat'] == "A050102")] # In[ ]: # Resetting indexes to find the country corresponding to the max value and selecting that index for # coloring its bar differently s1final2 = s1final2.reset_index(drop=True) s1final2.reset_index(drop=True, inplace=True) s2final2 = s2final2.reset_index(drop=True) s2final2.reset_index(drop=True, inplace=True) s3final2 = s3final2.reset_index(drop=True) s3final2.reset_index(drop=True, inplace=True) s4final2 = s4final2.reset_index(drop=True) s4final2.reset_index(drop=True, inplace=True) ## SUBPLOT1 # Defining the color colorsub_s1final2 = ['slategrey'] * 38 # Defining the highest value max_s1final2 = s1final2[2019].max() f_s1final2 = s1final2.index[s1final2[2019] == max_s1final2].tolist() f_s1final2 = f_s1final2[0] # Defining the lowest value min_s1final2 = s1final2[2019].min() f_s1final2_min = s1final2.index[s1final2[2019] == min_s1final2].tolist() f_s1final2_min = f_s1final2_min[0] # Defining the colors for max and min values colorsub_s1final2[f_s1final2] = ['midnightblue'] colorsub_s1final2[f_s1final2_min] = ['midnightblue'] ## SUBPLOT2 # Defining the color colorsub_s2final2 = ['dodgerblue'] * 38 # Defining the highest value max_s2final2 = s2final2[2019].max() f_s2final2 = s2final2.index[s2final2[2019] == max_s2final2].tolist() f_s2final2 = f_s2final2[0] # Defining the lowest value min_s2final2 = s2final2[2019].min() f_s2final2_min = s2final2.index[s2final2[2019] == min_s2final2].tolist() f_s2final2_min = f_s2final2_min[0] # Defining the colors for max and min values colorsub_s2final2[f_s2final2] = ['midnightblue'] colorsub_s2final2[f_s2final2_min] = ['midnightblue'] ## SUBPLOT3 # Defining the color colorsub_s3final2 = ['lightsteelblue'] * 38 # Defining the highest value max_s3final2 = s3final2[2019].max() f_s3final2 = s3final2.index[s3final2[2019] == max_s3final2].tolist() f_s3final2 = f_s3final2[0] # Defining the lowest value min_s3final2 = s3final2[2019].min() f_s3final2_min = s3final2.index[s3final2[2019] == min_s3final2].tolist() f_s3final2_min = f_s3final2_min[0] # Defining the colors for max and min values colorsub_s3final2[f_s3final2] = ['midnightblue'] colorsub_s3final2[f_s3final2_min] = ['midnightblue'] ## SUBPLOT4 # Defining the color colorsub_s4final2 = ['steelblue'] * 38 # Defining the highest value max_s4final2 = s4final2[2019].max() f_s4final2 = s4final2.index[s4final2[2019] == max_s4final2].tolist() f_s4final2 = f_s4final2[0] # Defining the lowest value min_s4final2 = s4final2[2019].min() f_s4final2_min = s4final2.index[s4final2[2019] == min_s4final2].tolist() f_s4final2_min = f_s4final2_min[0] # Defining the colors for max and min values colorsub_s4final2[f_s4final2] = ['midnightblue'] colorsub_s4final2[f_s4final2_min] = ['midnightblue'] # In[ ]: # Plotting fig5 = make_subplots(rows=5, cols=1, x_title='Countries', y_title='Values (€)',shared_xaxes=True, shared_yaxes=True) fig5.add_bar(x = final1hcfe['geo\\time'], y = final1hcfe[2019], name='HCFE', marker_color = colorsub_hcfe, row = 1, col = 1) fig5.add_bar(x = s1final2['geo\\time'], y = s1final2[2019], name='Electricity', marker_color = colorsub_s1final2, row = 2, col = 1) fig5.add_bar(x = s2final2['geo\\time'], y = s2final2[2019], name='Furniture', marker_color = colorsub_s2final2, row = 3, col = 1) fig5.add_bar(x = s3final2['geo\\time'], y = s3final2[2019], name='Households appliances', marker_color = colorsub_s3final2, row = 4, col = 1) fig5.add_bar(x = s4final2['geo\\time'], y = s4final2[2019], name='Consumer electronics', marker_color = colorsub_s4final2, row = 5, col = 1) fig5.update_xaxes(tickangle = 45) fig5.layout.template = 'plotly_white' fig5.update_layout( title = "Price levels for energy, furniture, household appliances and consumer electronics 2019\
(Midnightblue bars represent the highest and lowest values)", showlegend = True ) fig5.show() pio.write_html(fig5, file=os.path.join(_SAVDIR_,'figure5.html'), auto_open=True) # # [**Interactive plot available here**](https://eurostat.github.io/statistics-coded/economy/comparative-price-consumer-goods-services/figure5.html) # ### Table 3: Price levels for personal transport equipment, transport services, communication, restaurants and hotels # # We represent [Table 2](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=File:Price_level_index_for_personal_transport_equipment,_transport_services,_communication_and_restaurants_%26_hotels,_2019,_EU-27%3D100_v2.png). # The methodology is the same used in the previous sections. Again, there will not be any code referring to [**HCFE**](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Household_final_consumption_expenditure_(HFCE)) since everything was previously defined. # Hereby, the codes analysed refer to the consumption of: # * `"A010701"`, Personal Transport Equipment; # * `"A010703"`, Transport Services; # * `"A0108"`, Communication; # * `"A0111"`, Hotels & Restaurants. # In[ ]: # Filtering for categories final3 = tab3_3.loc[(tab3_3['ppp_cat'] == "A010701") | (tab3_3['ppp_cat'] == "A010703") | (tab3_3['ppp_cat'] == "A0108") | (tab3_3['ppp_cat'] == "A0111")].copy() # Explicating categories' codes category3=[] for i in final3['ppp_cat']: if i == 'A010701': category3.append('Personal Transport Equipment') elif i == 'A010703': category3.append('Transport Services') elif i == 'A0108': category3.append('Communication') else: category3.append('Hotels & Restaurants') final3['ppp_cat_expl3'] = category3 print(final3.head()) # In[ ]: # Definig the dataset for each subplot, by filtering the category s1final3=final3.loc[(tab3_3['ppp_cat'] == "A010701")] s2final3=final3.loc[(tab3_3['ppp_cat'] == "A010703")] s3final3=final3.loc[(tab3_3['ppp_cat'] == "A0108")] s4final3=final3.loc[(tab3_3['ppp_cat'] == "A0111")] # In[ ]: # Resetting indexes to find the country corresponding to the max value and selecting that index for # coloring its bar differently s1final3 = s1final3.reset_index(drop=True) s1final3.reset_index(drop=True, inplace=True) s2final3 = s2final3.reset_index(drop=True) s2final3.reset_index(drop=True, inplace=True) s3final3 = s3final3.reset_index(drop=True) s3final3.reset_index(drop=True, inplace=True) s4final3 = s4final3.reset_index(drop=True) s4final3.reset_index(drop=True, inplace=True) ## SUBPLOT1 # Defining the color colorsub_s1final3 = ['slategrey'] * 38 # Defining the highest value max_s1final3 = s1final3[2019].max() f_s1final3 = s1final3.index[s1final3[2019] == max_s1final3].tolist() f_s1final3 = f_s1final3[0] # Defining the lowest value min_s1final3 = s1final3[2019].min() f_s1final3_min = s1final3.index[s1final3[2019] == min_s1final3].tolist() f_s1final3_min = f_s1final3_min[0] # Defining the colors for max and min values colorsub_s1final3[f_s1final3] = ['midnightblue'] colorsub_s1final3[f_s1final3_min] = ['midnightblue'] ## SUBPLOT2 # Defining the color colorsub_s2final3 = ['dodgerblue'] * 38 # Defining the highest value max_s2final3 = s2final3[2019].max() f_s2final3 = s2final3.index[s2final3[2019] == max_s2final3].tolist() f_s2final3 = f_s2final3[0] # Defining the lowest value min_s2final3 = s2final3[2019].min() f_s2final3_min = s2final3.index[s2final3[2019] == min_s2final3].tolist() f_s2final3_min = f_s2final3_min[0] # Defining the colors for max and min values colorsub_s2final3[f_s2final3] = ['midnightblue'] colorsub_s2final3[f_s2final3_min] = ['midnightblue'] ## SUBPLOT3 # Defining the color colorsub_s3final3 = ['lightsteelblue'] * 38 # Defining the highest value max_s3final3 = s3final3[2019].max() f_s3final3 = s3final3.index[s3final3[2019] == max_s3final3].tolist() f_s3final3 = f_s3final3[0] # Defining the lowest value min_s3final3 = s3final3[2019].min() f_s3final3_min = s3final3.index[s3final3[2019] == min_s3final3].tolist() f_s3final3_min = f_s3final3_min[0] # Defining the colors for max and min values colorsub_s3final3[f_s3final3] = ['midnightblue'] colorsub_s3final3[f_s3final3_min] = ['midnightblue'] ## SUBPLOT4 # Defining the color colorsub_s4final3 = ['steelblue'] * 38 # Defining the highest value max_s4final3 = s4final3[2019].max() f_s4final3 = s4final3.index[s4final3[2019] == max_s4final3].tolist() f_s4final3 = f_s4final3[0] # Defining the lowest value min_s4final3 = s4final3[2019].min() f_s4final3_min = s4final3.index[s4final3[2019] == min_s4final3].tolist() f_s4final3_min = f_s4final3_min[0] # Defining the colors for max and min values colorsub_s4final3[f_s4final3] = ['midnightblue'] colorsub_s4final3[f_s4final3_min] = ['midnightblue'] # In[ ]: # Plotting fig6 = make_subplots(rows=5, cols=1, x_title='Countries', y_title='Values (€)',shared_xaxes=True, shared_yaxes=True) fig6.add_bar(x = final1hcfe['geo\\time'], y = final1hcfe[2019], name='HCFE', marker_color = colorsub_hcfe, row = 1, col = 1) fig6.add_bar(x = s1final3['geo\\time'], y = s1final3[2019], name='Personal Transport Equipment', marker_color = colorsub_s1final3, row = 2, col = 1) fig6.add_bar(x = s2final3['geo\\time'], y = s2final3[2019], name='Transport Services', marker_color = colorsub_s2final3, row = 3, col = 1) fig6.add_bar(x = s3final3['geo\\time'], y = s3final3[2019], name='Communication', marker_color = colorsub_s3final3, row = 4, col = 1) fig6.add_bar(x = s4final3['geo\\time'], y = s4final3[2019], name='Hotels & Restaurants', marker_color = colorsub_s4final3, row = 5, col = 1) fig6.update_xaxes(tickangle = 45) fig6.layout.template = 'plotly_white' fig6.update_layout( title = "Price levels for personal transport equipment, transport services,\
communication, restaurants and hotels 2019 (darker bars represent the highest and lowest values)", showlegend = True ) fig6.show() pio.write_html(fig6, file=os.path.join(_SAVDIR_,'figure6.html'), auto_open=True) # # [**Interactive plot available here**](https://eurostat.github.io/statistics-coded/economy/comparative-price-consumer-goods-services/figure6.html) # ### Figure 2: Price convergence # # In this section we want to represent the graph of [Figure 2](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=File:Price_convergence_-_coefficients_of_variation_of_price_level_indices_for_final_household_consumption_expenditure,_2009-2019.png). # # This graph depicts the Coefficients of Variation (CV) of the [**PLI**](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Price_level_index_%28PLI%29) for total [**HCFE**](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Household_final_consumption_expenditure_%28HFCE%29) over time. A decreasing CV of the coefficients of variation signals a price convergence; when increasing, it depicts a divergence. # # However, instead of displaying the values of 'All 27', we will use the value for the [*candidate*](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Candidate_countries) and (Albania, Montenegro, North Macedonia, Serbia and Turkey) and one [*Potential*](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Potential_candidates&redirect=no) countries (CPC1), due to data availability. # # The script for extracting data is similar to the precedent sections. So, we get data through `eurostat.get_data_df`, selecting _prc_ppp_conv_; we pick the values referring to the Coefficient of Variation for [**PLI**](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Price_level_index_%28HFCE%29) with `"CV_PLI"`, and we filter the data for [**HCFE**](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Household_final_consumption_expenditure_%28HFCE%29) with `"1E011"`. # After that, we select the group countries used [here](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=File:Price_convergence_-_coefficients_of_variation_of_price_level_indices_for_final_household_consumption_expenditure,_2009-2019.png). # In[ ]: # Retrieving data eu_conv = eurostat.get_data_df('prc_ppp_conv') eu_conv_2 = eu_conv.loc[eu_conv['statinfo'] == "CV_PLI"] final_conv = eu_conv_2.loc[eu_conv_2['ppp_cat'] == "E011"] final_conv2 = final_conv.loc[(final_conv['geo\\time'] == "EU15") | (final_conv['geo\\time'] == 'EA19') | (final_conv['geo\\time'] == 'EU27_2020') | (final_conv['geo\\time'] == 'CPC1')] print(final_conv2.head()) # Since we want to replicate the above line-chart, we transpose the dataset by `.T`. This operation will allow me to use years as `x-axis ticks`: # In[ ]: final_conv2_2=final_conv2[[2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009]] final_conv2_t=final_conv2_2.T print(final_conv2_t.head()) # Before making the chart, we manipulate the dataset renaming the variables (that were named with the indexes' values), we reset the indexes and we add a column for assigning the years as a variable: # In[ ]: final_conv2_t.rename(columns = {20: 'EA19', 21:'EU15', 24:'EU27', 13:'CPC1'}, inplace=True) final_conv2_t2 = final_conv2_t.reset_index(drop=True) final_conv2_t2['Years'] = [2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009] print(final_conv2_t2) # We finally plot the graph: # In[ ]: # Plotting fig7 = go.Figure() fig7.add_trace(go.Scatter(x=final_conv2_t2['Years'], y=final_conv2_t2['EA19'], mode='lines+markers', name='EA19')) fig7.add_trace(go.Scatter(x=final_conv2_t2['Years'], y=final_conv2_t2['EU15'], mode='lines+markers', name='EU15')) fig7.add_trace(go.Scatter(x=final_conv2_t2['Years'], y=final_conv2_t2['EU27'], mode='lines+markers', name='EU27')) fig7.add_trace(go.Scatter(x=final_conv2_t2['Years'], y=final_conv2_t2['CPC1'], mode='lines+markers', name='CPC1')) fig7.layout.template = 'plotly_white' fig7.update_xaxes(title = "Years", showline=True, linewidth=2, linecolor='black', tickangle = 45,tickmode='linear', showgrid=False) fig7.update_yaxes(title = "Coefficiten of Variations", showline=True, linewidth=2, linecolor='black', showgrid=False) fig7.update_layout( title = "Price Convergence (CVs)", showlegend = True) fig7.show() pio.write_html(fig7, file=os.path.join(_SAVDIR_,'figure7.html'), auto_open=True) # # [**Interactive plot available here**](https://eurostat.github.io/statistics-coded/economy/comparative-price-consumer-goods-services/figure7.html) # In[ ]: