#!/usr/bin/env python # coding: utf-8 # In[59]: #see plots in Jupyter get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np from matplotlib import pyplot as plt import pandas as pd import seaborn as sns #make pandas dataframe called "energy", comma as thousands separator, ":" for na_values #specify dtype for the numeric columns (TIME and Value) filename = "nrg_ind_334a_1_Data.csv" energy = pd.read_csv(filename, encoding = "ISO-8859-1", thousands = ",", na_values=":", dtype={"TIME": np.int, "Value": np.float}) #see head (first rows of data frame) energy.head(12) # In[60]: #get dimensions of data frame (rows, columns) energy.shape # In[61]: #list data type of each column, number of non-missing values, memory usage energy.info() # In[62]: #see which values in "INDIC_NRG" column exist energy.INDIC_NRG.unique() # In[63]: #see which values in "GEO" column exist energy.GEO.unique() # In[64]: #filter dataframe by means of boolean indexing/ boolean selection: european union, Primary energy consumption, MTOE eu_crit1 = energy.GEO == "European Union (28 countries)" eu_crit2 = energy.INDIC_NRG == "Primary energy consumption" eu_crit3 = energy.UNIT == "Million tonnes of oil equivalent (TOE)" eu_crit = eu_crit1 & eu_crit2 & eu_crit3 eu_energy = energy.loc[eu_crit] eu_energy # In[65]: # style, color palette, figure size my_dpi=113 plt.figure(figsize=(10, 6), dpi=my_dpi) #make plot plt.ylim(0, 2000) plt.fill_between("TIME", "Value", data=eu_energy, color="skyblue", alpha=0.2) plt.plot("TIME", "Value", data=eu_energy, marker='o', markerfacecolor='blue', markersize=12, color='skyblue', linewidth=4) sns.despine() #add values to the coordinates for index in eu_energy.index: time = eu_energy["TIME"].loc[index] value = eu_energy["Value"].loc[index] plt.text(time, value + 50, str(value), ha='center', va='bottom') # Add titles, labels, text plt.title("Primary Energy Consumption - European Union (28 countries) 2006 - 2015\n", loc='center', fontsize=12, fontweight=0, color='black') plt.xlabel("\nSource: Eurostat Database, http://ec.europa.eu/eurostat/data/database, Author: Bjoern Springer", fontsize="small") plt.ylabel("million tonnes of oil equivalent (MTOE)") # save as .png with less whitespace around plot plt.savefig("primary_energy_01.png", bbox_inches="tight") # In[66]: #make pandas dataframe called "demo", comma as thousands separator, ":" for na_values #specify type for the numeric columns (TIME and Value) filename = "demo_pjan_1_Data.csv" demo = pd.read_csv(filename, encoding = "ISO-8859-1", thousands = ",", na_values=":", dtype={"TIME": np.int, "Value": np.float}) #see head demo.head(12) # In[67]: #get dimensions of data frame demo.shape # In[68]: #list data type of each column, number of non-missing values, memry usage demo.info() # In[69]: #filter dataframe by means of boolean indexing/ boolean selection: european union eu_demo_crit1 = demo.GEO == "European Union (28 countries)" eu_demo = demo.loc[eu_demo_crit1] eu_demo # In[70]: #from eu_demo, only show columns: "TIME" and "value" eu_demo_tv = eu_demo[["TIME", "Value"]] eu_demo_tv # In[71]: #from eu_energy, only show columns: "TIME" and "value" eu_energy_tv = eu_energy[["TIME", "Value"]] eu_energy_tv # In[72]: #merge "eu_energy_tv" and "eu_demo_tv" along the "TIME" column, only shared "TIME" values ("inner") -> 2007 - 2015 eu_pec_pc = pd.merge(eu_energy_tv, eu_demo_tv, on='TIME', how='inner', suffixes=('_energy', '_demo')) eu_pec_pc # In[73]: #add column "pecpc" showing primary energy consumption per capita eu_pec_pc["pecpc"] = eu_pec_pc["Value_energy"] / eu_pec_pc["Value_demo"] * 1000000 eu_pec_pc # In[74]: # style, color palette, figure size my_dpi=113 plt.figure(figsize=(10, 6), dpi=my_dpi) #make plot plt.ylim(0, 4) plt.fill_between("TIME", "pecpc", data=eu_pec_pc, color="skyblue", alpha=0.2) plt.plot("TIME", "pecpc", data=eu_pec_pc, marker='o', markerfacecolor='blue', markersize=12, color='skyblue', linewidth=4) sns.despine() #add values to the coordinates for index in eu_pec_pc.index: time = eu_pec_pc["TIME"].loc[index] pecpc = round(eu_pec_pc["pecpc"].loc[index], 2) plt.text(time, pecpc + 0.1, str(pecpc), ha='center', va='bottom') # Add titles, labels, text plt.title("Primary Energy Consumption per Capita - European Union (28 countries) 2007 - 2015\n", loc='center', fontsize=12, fontweight=0, color='black') plt.xlabel("\nSource: Eurostat Database, http://ec.europa.eu/eurostat/data/database, Author: Bjoern Springer", fontsize="small") plt.ylabel("tonnes of oil equivalent (TOE) per person") # save as .png with less whitespace around plot plt.savefig("primary_energy_02.png", bbox_inches="tight") # In[75]: #filter "energy" dataframe by means of boolean indexing/ boolean selection: #single countries, Primary energy consumption > 0, MTOE, 2015 countries_crit1 = energy.GEO != "European Union (28 countries)" countries_crit2 = energy.INDIC_NRG == "Primary energy consumption" countries_crit3 = energy.UNIT == "Million tonnes of oil equivalent (TOE)" countries_crit4 = energy.TIME == 2015 countries_crit5 = energy.GEO != "Euro area (19 countries)" countries_crit6 = energy.Value > 0 countries_crit = countries_crit1 & countries_crit2 & countries_crit3 & countries_crit4 & countries_crit5 & countries_crit6 countries_energy = energy.loc[countries_crit] #replace long country names, sort by value (descending) countries_energy = countries_energy.replace('Germany (until 1990 former territory of the FRG)', 'Germany')\ .replace('Former Yugoslav Republic of Macedonia, the', 'Rep. of Macedonia')\ .replace('Kosovo (under United Nations Security Council Resolution 1244/99)', 'Kosovo')\ .replace('Bosnia and Herzegovina', 'Bosnia & Herzeg.')\ .sort_values("Value", ascending=False) countries_energy # In[76]: # style, color palette, figure size my_dpi=113 plt.figure(figsize=(10, 10), dpi=my_dpi) #make plot ax = sns.barplot(y=countries_energy.GEO, x=countries_energy.Value, data = countries_energy, palette="cubehelix") sns.despine() ax.spines['bottom'].set_visible(False) ax.xaxis.set_visible(False) #Attach a text label right of each bar displaying its length rects = ax.patches for rect in rects: width = rect.get_width() ax.text(width + 2, rect.get_y() + rect.get_height()/2, width, ha='left', va='center') # Add titles, labels, text plt.title("Europe's Biggest Energy Consumers 2015\nPrimary Energy Consumption (Million Tonnes of Oil Equivalent (MTOE))\n", loc='left', fontsize=12, fontweight=0, color='black') plt.xlabel("") plt.ylabel("") plt.text(0, 38,"\nSource: Eurostat Database, http://ec.europa.eu/eurostat/data/database, Author: Bjoern Springer", fontsize="small") # save as .png with less whitespace around plot plt.savefig("primary_energy_03.png", bbox_inches="tight") # In[77]: #filter "demo" dataframe by means of boolean indexing/ boolean selection: single countries, 2015, Value > 0 countries_demo_crit1 = ~ demo['GEO'].str.contains("Euro", na=False) countries_demo_crit2 = demo.TIME == 2015 countries_demo_crit3 = demo.Value > 0 countries_demo_crit = countries_demo_crit1 & countries_demo_crit2 & countries_demo_crit3 countries_demo = demo.loc[countries_demo_crit] #replace long country names (as used in countries_energy), sort by value (descending) countries_demo = countries_demo.replace('Germany (until 1990 former territory of the FRG)', 'Germany')\ .replace('Former Yugoslav Republic of Macedonia, the', 'Rep. of Macedonia')\ .replace('Kosovo (under United Nations Security Council Resolution 1244/99)', 'Kosovo')\ .replace('Bosnia and Herzegovina', 'Bosnia & Herzeg.')\ .sort_values("Value", ascending=False) countries_demo # In[78]: #from countries_demo, only show columns: "GEO" and "value" countries_demo_gv = countries_demo[["GEO", "Value"]] countries_demo_gv # In[79]: #from countries_energy, only show columns: "GEO" and "value" countries_energy_gv = countries_energy[["GEO", "Value"]] countries_energy_gv # In[80]: #merge "countries_energy_gv" and "countries_demo_gv" along the "GEO" column, only shared "GEO" values ("inner") countries_pec_pc = pd.merge(countries_energy_gv, countries_demo_gv, on='GEO', how='inner', suffixes=('_energy', '_demo')) countries_pec_pc # In[81]: #add column "pecpc" showing primary energy consumption per capita countries_pec_pc["pecpc"] = countries_pec_pc["Value_energy"] / countries_pec_pc["Value_demo"] * 1000000 countries_pec_pc = countries_pec_pc.sort_values("pecpc", ascending=False) countries_pec_pc # In[82]: # style, color palette, figure size my_dpi=113 plt.figure(figsize=(10, 10), dpi=my_dpi) #make plot ax = sns.barplot(y=countries_pec_pc.GEO, x=countries_pec_pc.pecpc, data = countries_pec_pc, palette="cubehelix") sns.despine() ax.spines['bottom'].set_visible(False) ax.xaxis.set_visible(False) #Attach a text label right of each bar displaying its length rects = ax.patches for rect in rects: width = rect.get_width() ax.text(width + .2, rect.get_y() + rect.get_height()/2, round(width, 2), ha='left', va='center') # Add titles, labels, text plt.title("Europe's Biggest Energy Consumers 2015\nPrimary Energy Consumption per Capita (Tonnes of Oil Equivalent (TOE) per Person)\n", loc='left', fontsize=12, fontweight=0, color='black') plt.xlabel("") plt.ylabel("") plt.text(0, 37,"\nSource: Eurostat Database, http://ec.europa.eu/eurostat/data/database, Author: Bjoern Springer", fontsize="small") # save as .png with less whitespace around plot plt.savefig("primary_energy_04.png", bbox_inches="tight") # In[ ]: