#!/usr/bin/env python
# coding: utf-8

# In[59]:


#see plots in Jupyter
get_ipython().run_line_magic('matplotlib', 'inline')

import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns

#make pandas dataframe called "energy", comma as thousands separator, ":" for na_values
#specify dtype for the numeric columns (TIME and Value)
filename = "nrg_ind_334a_1_Data.csv"
energy = pd.read_csv(filename, encoding = "ISO-8859-1", thousands = ",", na_values=":", 
                 dtype={"TIME": np.int, "Value": np.float})

#see head (first rows of data frame)
energy.head(12)


# In[60]:


#get dimensions of data frame (rows, columns)
energy.shape


# In[61]:


#list data type of each column, number of non-missing values, memory usage
energy.info()


# In[62]:


#see which values in "INDIC_NRG" column exist
energy.INDIC_NRG.unique()


# In[63]:


#see which values in "GEO" column exist
energy.GEO.unique()


# In[64]:


#filter dataframe by means of boolean indexing/ boolean selection: european union, Primary energy consumption, MTOE
eu_crit1 = energy.GEO == "European Union (28 countries)"
eu_crit2 = energy.INDIC_NRG == "Primary energy consumption"
eu_crit3 = energy.UNIT == "Million tonnes of oil equivalent (TOE)"
eu_crit = eu_crit1 & eu_crit2 & eu_crit3
eu_energy = energy.loc[eu_crit]
eu_energy


# In[65]:


# style, color palette, figure size
my_dpi=113
plt.figure(figsize=(10, 6), dpi=my_dpi)

#make plot
plt.ylim(0, 2000)
plt.fill_between("TIME", "Value", data=eu_energy, color="skyblue", alpha=0.2)
plt.plot("TIME", "Value", data=eu_energy, marker='o', markerfacecolor='blue', markersize=12, color='skyblue', 
         linewidth=4) 
sns.despine()

#add values to the coordinates
for index in eu_energy.index:
    time = eu_energy["TIME"].loc[index]
    value = eu_energy["Value"].loc[index]
    plt.text(time, value + 50, str(value), ha='center', va='bottom')

# Add titles, labels, text
plt.title("Primary Energy Consumption - European Union (28 countries) 2006 - 2015\n", loc='center', 
	fontsize=12, fontweight=0, color='black')
plt.xlabel("\nSource: Eurostat Database, http://ec.europa.eu/eurostat/data/database, Author: Bjoern Springer", 
           fontsize="small")
plt.ylabel("million tonnes of oil equivalent (MTOE)")

# save as .png with less whitespace around plot
plt.savefig("primary_energy_01.png", bbox_inches="tight")


# In[66]:


#make pandas dataframe called "demo", comma as thousands separator, ":" for na_values
#specify type for the numeric columns (TIME and Value)
filename = "demo_pjan_1_Data.csv"
demo = pd.read_csv(filename, encoding = "ISO-8859-1", thousands = ",", na_values=":", 
                 dtype={"TIME": np.int, "Value": np.float})

#see head
demo.head(12)


# In[67]:


#get dimensions of data frame
demo.shape


# In[68]:


#list data type of each column, number of non-missing values, memry usage
demo.info()


# In[69]:


#filter dataframe by means of boolean indexing/ boolean selection: european union
eu_demo_crit1 = demo.GEO == "European Union (28 countries)"
eu_demo = demo.loc[eu_demo_crit1]
eu_demo


# In[70]:


#from eu_demo, only show columns: "TIME" and "value"
eu_demo_tv = eu_demo[["TIME", "Value"]]
eu_demo_tv


# In[71]:


#from eu_energy, only show columns: "TIME" and "value"
eu_energy_tv = eu_energy[["TIME", "Value"]]
eu_energy_tv


# In[72]:


#merge "eu_energy_tv" and "eu_demo_tv" along the "TIME" column, only shared "TIME" values ("inner") -> 2007 - 2015
eu_pec_pc = pd.merge(eu_energy_tv, eu_demo_tv, on='TIME', how='inner', suffixes=('_energy', '_demo'))
eu_pec_pc


# In[73]:


#add column "pecpc" showing primary energy consumption per capita
eu_pec_pc["pecpc"] = eu_pec_pc["Value_energy"] / eu_pec_pc["Value_demo"] * 1000000
eu_pec_pc


# In[74]:


# style, color palette, figure size
my_dpi=113
plt.figure(figsize=(10, 6), dpi=my_dpi)

#make plot
plt.ylim(0, 4)
plt.fill_between("TIME", "pecpc", data=eu_pec_pc, color="skyblue", alpha=0.2)
plt.plot("TIME", "pecpc", data=eu_pec_pc, marker='o', markerfacecolor='blue', markersize=12, color='skyblue', 
         linewidth=4) 
sns.despine()

#add values to the coordinates
for index in eu_pec_pc.index:
    time = eu_pec_pc["TIME"].loc[index]
    pecpc = round(eu_pec_pc["pecpc"].loc[index], 2)
    plt.text(time, pecpc + 0.1, str(pecpc), ha='center', va='bottom')

# Add titles, labels, text
plt.title("Primary Energy Consumption per Capita - European Union (28 countries) 2007 - 2015\n", loc='center', 
	fontsize=12, fontweight=0, color='black')
plt.xlabel("\nSource: Eurostat Database, http://ec.europa.eu/eurostat/data/database, Author: Bjoern Springer", 
           fontsize="small")
plt.ylabel("tonnes of oil equivalent (TOE) per person")

# save as .png with less whitespace around plot
plt.savefig("primary_energy_02.png", bbox_inches="tight")


# In[75]:


#filter "energy" dataframe by means of boolean indexing/ boolean selection: 
#single countries, Primary energy consumption > 0, MTOE, 2015
countries_crit1 = energy.GEO != "European Union (28 countries)"
countries_crit2 = energy.INDIC_NRG == "Primary energy consumption"
countries_crit3 = energy.UNIT == "Million tonnes of oil equivalent (TOE)"
countries_crit4 = energy.TIME == 2015
countries_crit5 = energy.GEO != "Euro area (19 countries)"
countries_crit6 = energy.Value > 0
countries_crit = countries_crit1 & countries_crit2 & countries_crit3 & countries_crit4 & countries_crit5 & countries_crit6
countries_energy = energy.loc[countries_crit]

#replace long country names, sort by value (descending)
countries_energy = countries_energy.replace('Germany (until 1990 former territory of the FRG)', 'Germany')\
    .replace('Former Yugoslav Republic of Macedonia, the', 'Rep. of Macedonia')\
    .replace('Kosovo (under United Nations Security Council Resolution 1244/99)', 'Kosovo')\
    .replace('Bosnia and Herzegovina', 'Bosnia & Herzeg.')\
    .sort_values("Value", ascending=False)
    
countries_energy


# In[76]:


# style, color palette, figure size
my_dpi=113
plt.figure(figsize=(10, 10), dpi=my_dpi)

#make plot
ax = sns.barplot(y=countries_energy.GEO, x=countries_energy.Value, data = countries_energy, palette="cubehelix")
sns.despine()
ax.spines['bottom'].set_visible(False)
ax.xaxis.set_visible(False)

#Attach a text label right of each bar displaying its length
rects = ax.patches
for rect in rects:
    width = rect.get_width()
    ax.text(width + 2, rect.get_y() + rect.get_height()/2, width, ha='left', va='center')

# Add titles, labels, text
plt.title("Europe's Biggest Energy Consumers 2015\nPrimary Energy Consumption (Million Tonnes of Oil Equivalent (MTOE))\n", loc='left', 
	fontsize=12, fontweight=0, color='black')
plt.xlabel("")
plt.ylabel("")
plt.text(0, 38,"\nSource: Eurostat Database, http://ec.europa.eu/eurostat/data/database, Author: Bjoern Springer", 
         fontsize="small")

# save as .png with less whitespace around plot
plt.savefig("primary_energy_03.png", bbox_inches="tight")


# In[77]:


#filter "demo" dataframe by means of boolean indexing/ boolean selection: single countries, 2015, Value > 0
countries_demo_crit1 = ~ demo['GEO'].str.contains("Euro", na=False)
countries_demo_crit2 = demo.TIME == 2015
countries_demo_crit3 = demo.Value > 0
countries_demo_crit = countries_demo_crit1 & countries_demo_crit2 & countries_demo_crit3
countries_demo = demo.loc[countries_demo_crit]

#replace long country names (as used in countries_energy), sort by value (descending)
countries_demo = countries_demo.replace('Germany (until 1990 former territory of the FRG)', 'Germany')\
    .replace('Former Yugoslav Republic of Macedonia, the', 'Rep. of Macedonia')\
    .replace('Kosovo (under United Nations Security Council Resolution 1244/99)', 'Kosovo')\
    .replace('Bosnia and Herzegovina', 'Bosnia & Herzeg.')\
    .sort_values("Value", ascending=False)
    
countries_demo


# In[78]:


#from countries_demo, only show columns: "GEO" and "value"
countries_demo_gv = countries_demo[["GEO", "Value"]]
countries_demo_gv


# In[79]:


#from countries_energy, only show columns: "GEO" and "value"
countries_energy_gv = countries_energy[["GEO", "Value"]]
countries_energy_gv


# In[80]:


#merge "countries_energy_gv" and "countries_demo_gv" along the "GEO" column, only shared "GEO" values ("inner")
countries_pec_pc = pd.merge(countries_energy_gv, countries_demo_gv, on='GEO', how='inner', suffixes=('_energy', '_demo'))
countries_pec_pc


# In[81]:


#add column "pecpc" showing primary energy consumption per capita
countries_pec_pc["pecpc"] = countries_pec_pc["Value_energy"] / countries_pec_pc["Value_demo"] * 1000000
countries_pec_pc = countries_pec_pc.sort_values("pecpc", ascending=False)
countries_pec_pc


# In[82]:


# style, color palette, figure size
my_dpi=113
plt.figure(figsize=(10, 10), dpi=my_dpi)

#make plot
ax = sns.barplot(y=countries_pec_pc.GEO, x=countries_pec_pc.pecpc, data = countries_pec_pc, palette="cubehelix")
sns.despine()
ax.spines['bottom'].set_visible(False)
ax.xaxis.set_visible(False)

#Attach a text label right of each bar displaying its length
rects = ax.patches
for rect in rects:
    width = rect.get_width()
    ax.text(width + .2, rect.get_y() + rect.get_height()/2, round(width, 2), ha='left', va='center')

# Add titles, labels, text
plt.title("Europe's Biggest Energy Consumers 2015\nPrimary Energy Consumption per Capita (Tonnes of Oil Equivalent (TOE) per Person)\n", loc='left', 
	fontsize=12, fontweight=0, color='black')
plt.xlabel("")
plt.ylabel("")
plt.text(0, 37,"\nSource: Eurostat Database, http://ec.europa.eu/eurostat/data/database, Author: Bjoern Springer", 
         fontsize="small")

# save as .png with less whitespace around plot
plt.savefig("primary_energy_04.png", bbox_inches="tight")


# In[ ]: