import pandas as pd
# Jan: Alle imports außer pandas und logging werden nicht genutzt
import numpy as np
import os.path
import yaml # http://pyyaml.org/, pip install pyyaml, conda install pyyaml
import json
import subprocess
import sqlite3
%matplotlib inline
import logging
logger = logging.getLogger('notebook')
logger.setLevel('INFO')
nb_root_logger = logging.getLogger()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%d %b %Y %H:%M:%S')
nb_root_logger.handlers[0].setFormatter(formatter)
data_file = 'aggregated_capacity.csv'
# Jan: os.path.join verwenden
filepath = 'data_final/'+data_file
data = pd.read_csv(filepath, index_col=0)
data.head()
In the following, national generation capacities are compared to each other at the different technology levels.
# Jan: Leerzeichen um ==, keine Leerzeichen um =, wenn diese ein named
# argument sind. Außerdem sind die Argumente nicht untereinander, was es
# mehr leserlich macht. Whitespace um ,
# FRAUKE: Finde ich sehr praktisch da so einen Vergleich zu bekommen der Angaben,
## allerdings ist es leider auf der x-Achse unleserlich, da so viel Info drinsteckt.
## Vorschlag: Ein plot pro Land, könnte man mit ner Funktion umsetzen
pivot_capacity_level1 = pd.pivot_table(data[data.technology_level_1==True],
index=('country','year','source'),
# Jan: Braucht man die folgenden ( ) ?
columns = ('technology'),
values='capacity',
aggfunc=sum,
margins=False)
pivot_capacity_plot=pivot_capacity_level1.plot(kind='bar',stacked=True, legend=True, figsize=(12, 6))
pivot_capacity_plot.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
pivot_capacity_plot.set_ylim(0,250000)
pivot_capacity_plot
pivot_capacity_level1
# FRAUKE: Noch mehr Infos, auch hier lieber pro Land eine Grafik
# FRAUKE: Farben angeben, die sich nicht doppeln
pivot_capacity_level2 = pd.pivot_table(data[data.technology_level_2==True],
index=('country','year','source'),
columns = ('technology'),
values='capacity',
aggfunc=sum,
margins=False)
pivot_capacity_plot=pivot_capacity_level2.plot(kind='bar',stacked=True, legend=True, figsize=(12, 6))
pivot_capacity_plot.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
pivot_capacity_plot.set_ylim(0,250000)
pivot_capacity_plot
pivot_capacity_level2
# FRAUKE: hier gilt das gleiche wie bei der Technology level 2 Grafik
# Jan: Leerzeichen...
pivot_capacity_level3 = pd.pivot_table(data[data.technology_level_3==True],
index=('country','year','source'),
columns = ('technology'),
values='capacity',
aggfunc=sum,
margins=False)
# Jan: Whitespace around =
pivot_capacity_plot=pivot_capacity_level3.plot(kind='bar',stacked=True, legend=True, figsize=(12, 6))
pivot_capacity_plot.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
pivot_capacity_plot.set_ylim(0,250000)
pivot_capacity_plot
pivot_capacity_level3
In the following, the installed capacities at the different technology levels are compared to each other. In any case, the total sum of all technologies within a certain technology level should match with other technology levels.
# FRAUKE: Kapzität ohne Nachkommastellen angeben, dann wird die Tabell übersichtlicher
# FRAUKE: Die Idee der Grafik ist gut, aber so wie es gerade ist kann man leider nicht
## viel erkennen, man sieht nur schwarze unterschiedlich hohe Balken
# Jan: Wenn man parameter = ['capacity_definition','source','year','type','country']
# setzt kann man diesen Abschnitt übersichtlicher machen. Besser wäre
# ein noch mehr beschreibender Name. Z.B.: grouping_para o.ä.
capacity_total_0 = pd.DataFrame(data[data['technology_level_0']==True]
.groupby(['capacity_definition','source','year','type','country'])['capacity'].sum())
capacity_total_1 = pd.DataFrame(data[data['technology_level_1']==True]
.groupby(['capacity_definition','source','year','type','country'])['capacity'].sum())
capacity_total_2 = pd.DataFrame(data[data['technology_level_2']==True]
.groupby(['capacity_definition','source','year','type','country'])['capacity'].sum())
capacity_total_3 = pd.DataFrame(data[data['technology_level_3']==True]
.groupby(['capacity_definition','source','year','type','country'])['capacity'].sum())
capacity_total_comparison = pd.DataFrame(capacity_total_0)
capacity_total_comparison = pd.merge(capacity_total_0, capacity_total_1,left_index=True,right_index=True,how='left')
capacity_total_comparison = capacity_total_comparison.rename(columns={'capacity_x': 'technology level 0',
'capacity_y': 'technology level 1'})
capacity_total_comparison = pd.merge(capacity_total_comparison, capacity_total_2,left_index=True,right_index=True,how='left')
capacity_total_comparison = pd.merge(capacity_total_comparison, capacity_total_3,left_index=True,right_index=True,how='left')
capacity_total_comparison = capacity_total_comparison.rename(columns={'capacity_x': 'technology level 2',
'capacity_y': 'technology level 3'})
capacity_total_comparison = capacity_total_comparison.sortlevel(['country','year'])
capacity_total_pivot_plot = capacity_total_comparison.plot(kind='bar',stacked=False, legend=True, figsize=(12, 6))
capacity_total_pivot_plot.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
#capacity_total_pivot_plot.set_ylim(0,250000)
capacity_total_pivot_plot
capacity_total_comparison
Identification of differences between technology levels for each country, source, and year. Generally, differences between the technology levels should be zero, but could differ in particular for ENTSO-E data.
# FRAUKE: Habt ihr ne Vermutung warum das bei ENTSO-E differs? Dann ruhig als Kommentar
## hinschreiben
capacity_total_difference = capacity_total_comparison.diff(periods=1,axis=1)
capacity_total_difference = capacity_total_difference[(capacity_total_difference['technology level 1'] > 0.01) |
(capacity_total_difference['technology level 1'] < -0.01) |
(capacity_total_difference['technology level 2'] > 0.01) |
(capacity_total_difference['technology level 2'] < -0.01) |
(capacity_total_difference['technology level 3'] > 0.01) |
(capacity_total_difference['technology level 3'] < -0.01)]
capacity_total_difference
# FRAUKE: Kommentar warum ihr das hier für entsoe macht wäre gut
capacity_total_comparison = pd.DataFrame(capacity_total_comparison.stack()).reset_index().rename(
columns={'level_5': 'technology_level',0: 'capacity'})
capacity_total_pivot = pd.pivot_table(
# select specific country for comparison
# capacity_total_comparison[capacity_total_comparison['country']=='SK'],
# select specific source for comparison
capacity_total_comparison[capacity_total_comparison['source']=='entsoe'],
index=('country','year','source'),
columns='technology_level',
values='capacity',
aggfunc=sum,
margins=False)
capacity_total_pivot_plot = capacity_total_pivot.plot(kind='bar',stacked=False, legend=True, figsize=(12, 6))
capacity_total_pivot_plot.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
#capacity_total_pivot_plot.set_ylim(0,250000)
capacity_total_pivot_plot
capacity_total_pivot