#!/usr/bin/env python # coding: utf-8 # # # # #
# Conventional Power Plants: Power Plants in Europe # #
This Notebook is part of the Conventional Power Plants Datapackage of Open Power System Data. #
# # Table of Contents # * [1. Script setup](#1.-Script-setup) # * [2. Settings](#2.-Settings) # * [2.1 Choose download option](#2.1-Choose-download-option) # * [2.2 Definition of national data sources](#2.2-Definition-of-national-data-sources) # * [2.3 Download function](#2.3-Download-function) # * [2.4 Import function](#2.4-Import-function) # * [2.5 Other functions](#2.5-Other-functions) # * [2.6 Definition of harmonized output scheme](#2.6-Definition-of-harmonized-output-scheme) # * [3. Download of data](#3.-Download-of-data) # * [4. Data processing per country](#4.-Data-processing-per-country) # * [4.1 Belgium BE](#4.1-Belgium-BE) # * [4.1.1 Data import](#4.1.1-Data-import) # * [4.1.2 Translation and harmonization of columns](#4.1.2-Translation-and-harmonization-of-columns) # * [4.1.3 Definition of generation type](#4.1.3-Definition-of-generation-type) # * [4.1.4 Definition of generation technology types](#4.1.4-Definition-of-generation-technology-types) # * [4.1.5 Definition of energy sources](#4.1.5-Definition-of-energy-sources) # * [4.1.6 Additional information on geographic coordinates and EIC codes](#4.1.6-Additional-information-on-geographic-coordinates-and-EIC-codes) # * [4.2 The Netherlands NL](#4.2-The-Netherlands-NL) # * [4.2.1 Data import and merger](#4.2.1-Data-import-and-merger) # * [4.2.2 Translation and harmonization of columns](#4.2.2-Translation-and-harmonization-of-columns) # * [4.2.3 Definition of energy sources](#4.2.3-Definition-of-energy-sources) # * [4.2.4 Adjustment of capacity for plant Rijnmond II (version '2016-10-27')](#4.2.4-Adjustment-of-capacity-for-plant-Rijnmond-II-%28version-'2016-10-27'%29) # * [4.2.5 Select daily entry with highest available capacity](#4.2.5-Select-daily-entry-with-highest-available-capacity) # * [4.2.6 Additional information on geographic coordinates and EIC codes](#4.2.6-Additional-information-on-geographic-coordinates-and-EIC-codes) # * [4.3 France FR](#4.3-France-FR) # * [4.3.1 Data import](#4.3.1-Data-import) # * [4.3.2 Translation and harmonization of columns](#4.3.2-Translation-and-harmonization-of-columns) # * [4.3.3 Treatment of duplicate data entries](#4.3.3-Treatment-of-duplicate-data-entries) # * [4.3.4 Definition of energy sources and generation of technology types](#4.3.4-Definition-of-energy-sources-and-generation-of-technology-types) # * [4.3.5 Additional information on geographic coordinates and EIC codes](#4.3.5-Additional-information-on-geographic-coordinates-and-EIC-codes) # * [4.4 Poland PL](#4.4-Poland-PL) # * [4.4.1 Data import](#4.4.1-Data-import) # * [4.4.2 Translation and harmonization of columns](#4.4.2-Translation-and-harmonization-of-columns) # * [4.4.3 Definition of energy sources](#4.4.3-Definition-of-energy-sources) # * [4.4.4 Definition of generation technology types](#4.4.4-Definition-of-generation-technology-types) # * [4.4.5 Additional information on further power plants, geographic coordinates and EIC codes](#4.4.5-Additional-information-on-further-power-plants,-geographic-coordinates-and-EIC-codes) # * [4.5 Czech Republic CZ](#4.5-Czech-Republic-CZ) # * [4.5.1 Data import](#4.5.1-Data-import) # * [4.5.2 Translation and harmonization of columns](#4.5.2-Translation-and-harmonization-of-columns) # * [4.5.3 Definition of generation technology types](#4.5.3-Definition-of-generation-technology-types) # * [4.5.4 Additional information on further power plants, geographic coordinates and EIC codes](#4.5.4-Additional-information-on-further-power-plants,-geographic-coordinates-and-EIC-codes) # * [4.6 Switzerland CH](#4.6-Switzerland-CH) # * [4.6.1 Data import](#4.6.1-Data-import) # * [4.6.2 Consolidation, translation and harmonization of columns](#4.6.2-Consolidation,-translation-and-harmonization-of-columns) # * [4.6.3 Definition of generation technology types](#4.6.3-Definition-of-generation-technology-types) # * [4.6.4 Additional information on geographic coordinates and EIC codes](#4.6.4-Additional-information-on-geographic-coordinates-and-EIC-codes) # * [4.6.5 Merge hydro and nuclear power plant data](#4.6.5-Merge-hydro-and-nuclear-power-plant-data) # * [4.7 Italy IT](#4.7-Italy-IT) # * [4.7.1 Data import](#4.7.1-Data-import) # * [4.7.2 Translation and harmonization of columns](#4.7.2-Translation-and-harmonization-of-columns) # * [4.7.3 Definition of energy sources](#4.7.3-Definition-of-energy-sources) # * [4.7.4 Additional information on geographic coordinates and EIC codes](#4.7.4-Additional-information-on-geographic-coordinates-and-EIC-codes) # * [4.8 Finland FI](#4.8-Finland-FI) # * [4.8.1 Data import](#4.8.1-Data-import) # * [4.8.2 Translation and harmonization of columns](#4.8.2-Translation-and-harmonization-of-columns) # * [4.8.3 Definition of energy sources](#4.8.3-Definition-of-energy-sources) # * [4.8.4 Definition of generation technology types](#4.8.4-Definition-of-generation-technology-types) # * [4.8.5 Definition of generation type](#4.8.5-Definition-of-generation-type) # * [4.8.6 Additional information on geographic coordinates and EIC codes](#4.8.6-Additional-information-on-geographic-coordinates-and-EIC-codes) # * [4.9 Spain ES](#4.9-Spain-ES) # * [4.9.1 Data import](#4.9.1-Data-import) # * [4.9.2 Translation and harmonization of columns](#4.9.2-Translation-and-harmonization-of-columns) # * [4.9.3 Definition of energy sources](#4.9.3-Definition-of-energy-sources) # * [4.9.4 Definition of generation technology types](#4.9.4-Definition-of-generation-technology-types) # * [4.9.5 Definition of energy sources](#4.9.5-Definition-of-energy-sources) # * [4.9.6 Additional information on geographic coordinates and EIC codes](#4.9.6-Additional-information-on-geographic-coordinates-and-EIC-codes) # * [4.10 United Kingdom UK](#4.10-United-Kingdom-UK) # * [4.10.1 Data import](#4.10.1-Data-import) # * [4.10.2 Translation and harmonization of columns](#4.10.2-Translation-and-harmonization-of-columns) # * [4.10.3 Definition of generation technology types](#4.10.3-Definition-of-generation-technology-types) # * [4.10.4 Definition of energy sources](#4.10.4-Definition-of-energy-sources) # * [4.10.5 Additional information on geographic coordinates and EIC codes](#4.10.5-Additional-information-on-geographic-coordinates-and-EIC-codes) # * [4.11 Norway NO](#4.11-Norway-NO) # * [4.12 Sweden SE](#4.12-Sweden-SE) # * [4.13 Slovakia SK](#4.13-Slovakia-SK) # * [4.14 Slovenia SI](#4.14-Slovenia-SI) # * [4.15 Austria AT](#4.15-Austria-AT) # * [4.16 Denmark DK](#4.16-Denmark-DK) # * [5. Consolidation of processed country data](#5.-Consolidation-of-processed-country-data) # * [5.1 Implementation of energy source levels](#5.1-Implementation-of-energy-source-levels) # * [5.2 Definition of structure and data types](#5.2-Definition-of-structure-and-data-types) # * [6. Documentation of the data package](#6.-Documentation-of-the-data-package) # * [7. Result export](#7.-Result-export) # * [7.1 Write results to file](#7.1-Write-results-to-file) # * [7.2 Write checksums](#7.2-Write-checksums) # # # 1. Script setup # Import of Python modules needed to process the data and creation of required output folders. **Note:** The package `pyproj` needs to installed as they are not part of the standard Anaconda installation. # In[ ]: import datetime import json import logging import os import urllib.parse import urllib.request import sqlite3 import zipfile import numpy as np import pandas as pd import pyproj # required for transforming coordinates import yaml import hashlib logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%d %b %Y %H:%M:%S' ) logger = logging.getLogger() # create download and output folder if they do not exist os.makedirs(os.path.join('download'), exist_ok=True) os.makedirs(os.path.join('output'), exist_ok=True) os.makedirs(os.path.join('output', 'original_data'), exist_ok=True) # # 2. Settings # ## 2.1 Choose download option # The original data can either be downloaded from the original data sources as specified below or from the OPSD-Server. Default option is to download from the original sources as the aim of the project is to stay as close to original sources as possible. However, if problems with downloads e.g. due to changing URLs occur, you can still run the script with the original data from the OPSD-server. # In[ ]: # Activate or uncomment to download from original data sources download_from = 'original_sources' # Activate or uncomment to download from existing OPSD versions # download_from = 'opsd_server' # version = '2016-10-27' # ## 2.2 Definition of national data sources # The meta data of the national data sources, which is required for downloading and importing the data, are laid down in the following section. The following meta data information are required, which partly differ for each data source depending on the filetype for import. # # **CSV format** # ``` # Country: Country ISO code # Name of source: Short name of source # url_initial: Initial URL on data source webpage # url_template: Direct URL to data file # filename: Name of data file without filetype extension # filename_opsd: Name of data file for OPSD download # '2016-10-27': Name of data file in previous OPSD version # filetype: Name of filetype (e.g. csv) # sep: Column separator (e.g. ',') # skiprows: Number of first rows to be skipped during import # decimal: Decimal separator (e.g. '.') # encoding: Type of encoding (e.g. 'utf-8') # ``` # **Excel format (xls, xlsx)** # # ``` # Country: Country ISO code # Name of source: Short name of source # url_initial: Initial URL on data source webpage # url_template: Direct URL to data file # filename: Name of data file without filetype extension # filename_opsd: Name of data file for OPSD download # '2016-10-27': Name of data file in previous OPSD version # filetype: Name of filetype (e.g. xlsx) # sheetname: Name of sheet in workbook # skiprows: Number of first rows to be skipped during import # # ``` # In[ ]: conf = """ BE: Elia: url_initial: http://www.elia.be/en/grid-data/power-generation/generating-facilities url_template: http://publications.elia.be/upload/ProductionParkOverview.xls?TS=20120416193815 filename: ProductionParkOverview filename_opsd: '2016-10-27': ProductionParkOverview '2017-03-03': ProductionParkOverview '2017-07-03': ProductionParkOverview filetype: xls sheetname: 'ProductionParkOverview' skiprows: 1 NL: Tennet_Q1: url_initial: http://www.tennet.org/english/operational_management/export_data.aspx url_template: http://www.tennet.org/english/operational_management/export_data.aspx?exporttype=installedcapacity&format=csv&quarter=2016-1&submit=3 filename: export_Q12016 filename_opsd: '2016-10-27': export_Q12015 '2017-03-03': export_Q12016 '2017-07-03': export_Q12016 filetype: csv sep: ',' skiprows: 0 decimal: '.' encoding: 'utf-8' Tennet_Q2: url_initial: http://www.tennet.org/english/operational_management/export_data.aspx url_template: http://www.tennet.org/english/operational_management/export_data.aspx?exporttype=installedcapacity&format=csv&quarter=2016-2&submit=3 filename: export_Q22016 filename_opsd: '2016-10-27': export_Q22015 '2017-03-03': export_Q22016 '2017-07-03': export_Q22016 filetype: csv sep: ',' skiprows: 0 decimal: '.' encoding: 'utf-8' Tennet_Q3: url_initial: http://www.tennet.org/english/operational_management/export_data.aspx url_template: http://www.tennet.org/english/operational_management/export_data.aspx?exporttype=installedcapacity&format=csv&quarter=2016-3&submit=3 filename: export_Q32016 filename_opsd: '2016-10-27': export_Q32015 '2017-03-03': export_Q32016 '2017-07-03': export_Q32016 filetype: csv sep: ',' skiprows: 0 decimal: '.' encoding: 'utf-8' Tennet_Q4: url_initial: http://www.tennet.org/english/operational_management/export_data.aspx url_template: http://www.tennet.org/english/operational_management/export_data.aspx?exporttype=installedcapacity&format=csv&quarter=2016-4&submit=3 filename: export_Q42016 filename_opsd: '2016-10-27': export_Q42015 '2017-03-03': export_Q42016 '2017-07-03': export_Q42016 filetype: csv sep: ',' skiprows: 0 decimal: '.' encoding: 'utf-8' IT: Terna: url_initial: http://www.terna.it/it-it/sistemaelettrico/transparencyreport/generation/installedgenerationcapacity.aspx url_template: http://download.terna.it/terna/0000/0216/16.XLSX filename: 16 filename_opsd: '2016-10-27': 16 '2017-03-03': 16 '2017-07-03': 16 filetype: xls sheetname: 'UPR PmaxOver 100MW' skiprows: 0 FR: RTE: url_initial: https://clients.rte-france.com/lang/an/visiteurs/vie/prod/parc_reference.jsp url_template: http://clients.rte-france.com/servlets/CodesEICServlet filename: Centrales_production_reference filename_opsd: '2016-10-27': Centrales_production_reference '2017-03-03': Centrales_production_reference '2017-07-03': Centrales_production_reference filetype: zip sep: '\t' skiprows: 2 decimal: ',' encoding: 'cp1252' ES: SEDE: url_initial: https://sedeaplicaciones.minetur.gob.es/electra/BuscarDatos.aspx url_template: http://www6.mityc.es/aplicaciones/electra/ElectraExp.csv.zip filename: ElectraExp filename_opsd: '2016-10-27': ElectraExp '2017-03-03': ElectraExp '2017-07-03': ElectraExp filetype: zip sep: ';' skiprows: 0 decimal: ',' encoding: 'utf-8' FI: EnergyAuthority: url_initial: http://www.energiavirasto.fi/en/web/energy-authority/power-plant-register url_template: http://www.energiavirasto.fi/documents/10191/0/Energiaviraston+voimalaitosrekisteri+06072017.xlsx filename: Energiaviraston+voimalaitosrekisteri+21032017 filename_opsd: '2016-10-27': Energiaviraston+Voimalaitosrekisteri+040316 '2017-03-03': Energiaviraston+Voimalaitosrekisteri+010117 '2017-07-03': Energiaviraston+voimalaitosrekisteri+06072017 filetype: xlsx sheetname: 'English' skiprows: 1 PL: GPI: url_initial: http://gpi.tge.pl/en/wykaz-jednostek;jsessionid=C2472043DF326CED2F9C0840B503F5B0.gpi-app1 url_template: http://gpi.tge.pl/en/wykaz-jednostek?p_p_id=powerunits_WAR_powerunitsportlet&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_count=1 filename: units_list filename_opsd: '2016-10-27': units_list '2017-03-03': units_list '2017-07-03': units_list filetype: csv sep: ';' skiprows: 0 decimal: '.' encoding: 'utf-8' UK: GOV: url_initial: https://www.gov.uk/government/statistics/electricity-chapter-5-digest-of-united-kingdom-energy-statistics-dukes url_template: https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/558040/DUKES_5.10.xls filename: DUKES_5.10 filename_opsd: '2016-10-27': dukes5_10 '2017-03-03': dukes5_10 '2017-07-03': DUKES_5.10 filetype: xls sheetname: 'Database' skiprows: 3 CZ: CEPS: url_initial: https://www.ceps.cz/ENG/Data/Vsechna-data/Pages/Instalovany-vykon.aspx url_template: http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=xlsx&from=1/1/2016%2012:00:00%20AM&to=12/31/2016%2011:59:59%20PM&hasinterval=False&sol=9&lang=ENG&ver=YF& filename: Data filename_opsd: '2016-10-27': Data '2017-03-03': Data '2017-07-03': Data filetype: xlsx sheetname: 'NewWorksheet' skiprows: 2 CH: BFE: url_initial: http://www.bfe.admin.ch/themen/00490/00491/index.html?lang=de&dossier_id=01049 url_template: http://www.bfe.admin.ch/php/modules/publikationen/stream.php?extlang=de&name=de_618216248.zip&endung=Statistik%20der%20Wasserkraftanlagen%20der%20Schweiz filename: 'Statistik der Wasserkraftanlagen der Schweiz 1.1.2017' filename_opsd: '2016-10-27': 'Statistik der Wasserkraftanlagen der Schweiz 1.1.2016' '2017-03-03': 'Statistik der Wasserkraftanlagen der Schweiz 1.1.2016' '2017-07-03': 'Statistik der Wasserkraftanlagen der Schweiz 1.1.2017' filetype: zip sheetname: 'Vorschau' skiprows: 0 """ conf = yaml.load(conf) # ## 2.3 Download function # The download function standardizes the download process of the different national data sources. The required input parameter are the URL, the filename, and the filetype. The function downloads the corresponding data source and saves the file in the folder `download`. Additionally, the file is saved in the folder `output/original_data` together with the results of this processing script. # In[ ]: def downloadandcache(url, filename, filetype): """This function downloads a file into a folder called downloads and returns the local filepath.""" filename = str(filename) filetype = filetype now = datetime.datetime.now() datestring = "" datestring = str(now.year)+"-"+str(now.month)+"-"+str(now.day) filepath = os.path.join( 'download', datestring+"-"+filename+"."+filetype) filepath_original_data = os.path.join( 'output', 'original_data', filename+"."+filetype) # Check if file exists, otherwise download it if not os.path.exists(filepath): print("Downloading file", filename+"."+filetype) urllib.request.urlretrieve(url, filepath) urllib.request.urlretrieve(url, filepath_original_data) else: print("Using local file from", filepath) filepath = './'+filepath return filepath # ## 2.4 Import function # The import function reads the downloaded data files and imports it into a data frame. The function uses the meta data defined in [2.1](#2.2-Definition-of-national-data-sources). Depending on the filetype, pandas-specific import function are used to read the files. # In[ ]: def importdata(country, tso): """This function imports the downloaded data and returns a data frame for further processing.""" now = datetime.datetime.now() datestring = "" datestring = str(now.year)+"-"+str(now.month)+"-"+str(now.day) param = conf[country][tso] filepath = os.path.join('download', datestring+"-"+str( param['filename'])+"."+param['filetype']) if param['filetype'] == 'csv': data_import = pd.read_csv(filepath, sep=param['sep'], skiprows=param['skiprows'], decimal=param['decimal'], encoding=param['encoding'], low_memory=False) elif param['filetype'] == 'dbf': dbf = Dbf5(filepath, codec=param['encoding']) data_import = dbf.to_dataframe() else: data_import = pd.read_excel(filepath, sheetname=param['sheetname'], skiprows=param['skiprows']) data_import['country'] = str(country) data_import['source'] = str(tso) return data_import # ## 2.5 Other functions # In[ ]: # Hash function def get_sha_hash(path, blocksize=65536): sha_hasher = hashlib.sha256() with open(path, 'rb') as f: buffer = f.read(blocksize) while len(buffer) > 0: sha_hasher.update(buffer) buffer = f.read(blocksize) return sha_hasher.hexdigest() # ## 2.6 Definition of harmonized output scheme # To provide a standardizes set of power plant information among all national data sources, a set of required columns is defined which is subsequently filled with available data. The following columns and their structure are the basis for all national data sources. # # **Note:** If information for specific columns are not available, the data entry is empty. On the other hand, if the national data sources provides other information than required by the scheme, these information are not processed. # In[ ]: columns_sorted = ['name', 'company', 'street', 'postcode', 'city', 'country', 'capacity', 'energy_source', 'technology', 'chp', 'commissioned', 'type', 'lat', 'lon', 'eic_code', 'additional_info', 'comment', 'source'] # # 3. Download of data # The subsequent download loops of the different entries, countries and data sources, of the meta data file (section [2.1](#2.2-Definition-of-national-data-sources)) and calls the previously defined download function which is feeded with the data source specific meta data. After downloading the data files, they are further handled to recieve files which are ready for a direct import through pandas. In most cases, readable filetypes are provided (e.g. csv, xls, xlsx). However, the following exceptions need special attention: # - **FR** (RTE) provides a compressed zip-file with a corrupted Excel-file. Renaming the Excel- to a CSV-file, solves the import problem. # - **ES** (SEDE) delivers a compressed zip-file which needs to be extracted before importing. Files are extracted in the folder `download`. # - **CH** (BFE) delivers a compressed zip-file which needs to be extracted before importing. Files are extracted in the folder `download`. # In[ ]: for country, tso in conf.items(): for tso, param in tso.items(): # Read and define input values for download function if download_from == 'original_sources': url_data = param['url_template'] filename_data = param['filename'] filetype_data = param['filetype'] if download_from == 'opsd_server': url_data = 'http://data.open-power-system-data.org/conventional_power_plants/' + version + '/original_data/' filename_data = param['filename_opsd'][version] filetype_data = param['filetype'] url_data = url_data + str(filename_data).replace(" ", "%20") + '.' + str(filetype_data) print(url_data) # Call of download function downloadandcache(url_data, filename_data, filetype_data) # Treatment of special cases for FR, ES, and CH now = datetime.datetime.now() datestring = str(now.year) + "-" + str(now.month) + "-" + str(now.day) filepath = os.path.join( "download", datestring + "-" + str(filename_data)) # Special case FR (RTE): ZIP-file with corrupted xls-file is provided, which needs to be renamed to csv if country == 'FR': with zipfile.ZipFile(filepath + ".zip", "r") as O: O.extractall("download") if not os.path.exists(filepath + ".csv"): os.rename(os.path.join("download", filename_data + ".xls"), filepath + ".csv") # change filetype from zip to csv conf[country][tso]['filetype'] = "csv" # Special case ES (SEDE): ZIP-file with csv file if country == 'ES': with zipfile.ZipFile(filepath + ".zip", "r") as O: O.extractall("download") if not os.path.exists(filepath + ".csv"): os.rename(os.path.join("download", filename_data + ".csv"), filepath + ".csv") # change filetype from zip to csv conf[country][tso]['filetype'] = "csv" # Special case CH (BFE): ZIP-file with xlsx file if country == 'CH': with zipfile.ZipFile(filepath + ".zip", "r") as O: O.extractall("download") if not os.path.exists(filepath + ".xlsx"): os.rename(os.path.join("download", filename_data + ".xlsx"), filepath + ".xlsx") # change filetype from zip to csv conf[country][tso]['filetype'] = "xlsx" # print(conf) # # 4. Data processing per country # ## 4.1 Belgium BE # ### 4.1.1 Data import # The data is provided by the Belgian transmission network operator [**ELIA**](http://publications.elia.be/upload/ProductionParkOverview.xls?TS=20120416193815). It encompasses a detailed list of Belgian generation units with comprehensive information on technologies and energy fuels. # In[ ]: data_BE = importdata('BE', 'Elia') data_BE.head() # ### 4.1.2 Translation and harmonization of columns # The imported data is standardized with respect to the columns as defined in section [2.4](#2.6-Definition-of-harmonized-output-scheme). In a first step, existing and output-relevant columns are translated and remaining columns are deleted in a second step. Columns which are not exist in the data set, but required for the output are additionally added in this process. # In[ ]: # Translate columns dict_columns_BE = {'ARP': 'company', 'Generation plant': 'name', 'Plant Type': 'technology', 'Technical Nominal Power (MW)': 'capacity', 'Remarks': 'comment', 'Fuel': 'energy_source', 'Country': 'country', 'Source': 'source'} data_BE.rename(columns=dict_columns_BE, inplace=True) # Apply general template of columns data_BE = data_BE.reindex(columns=columns_sorted) # Drop rows without capacity entries, so that row with # "Unit connected to Distribution Grid" is dropped data_BE = data_BE.dropna(subset=['capacity']) # Adjust types of entries in all columns data_BE.capacity = data_BE.capacity.astype(float) data_BE.head() # ### 4.1.3 Definition of generation type # The generation type provides information on the 'usage' of the power plants (beside electricity generaiton), thus if the plant is an industrial power plant or provides thermal heat for district heating. # # The Belgian data source provides only a general information on the heat supply (here: WKK). Thus, due to these general informaiton, we classify corresponding plants as both, industrial or combined heat power plant, and cannot distringuish both types. # In[ ]: # Generate entries in column "type" according to technology "WKK" data_BE.loc[data_BE['technology'] == 'WKK', 'type'] = 'CHP/IPP' data_BE.loc[data_BE['name'].str.contains('WKK'), 'type'] = 'CHP/IPP' # Generate entries in column "CHP" according to column "type" data_BE.loc[(data_BE['type'] == 'CHP') | (data_BE['type'] == 'IPP') | (data_BE['type'] == 'CHP/IPP'), 'chp'] = 'Yes' # ### 4.1.4 Definition of generation technology types # Overall translation of all technology types mentioned in the column "technology" and subsequent translation check. # In[ ]: # Translate technologies dict_technology_BE = {'GT': 'Gas turbine', 'BG': 'NaN', 'CL': 'Steam turbine', 'WKK': 'NaN', 'CCGT': 'Combined cycle', 'D': 'NaN', 'HU': 'NaN', 'IS': 'NaN', 'NU': 'Steam turbine', 'TJ': 'Gas turbine', 'WT': 'NaN', ' ': 'NaN'} data_BE["technology"].replace(dict_technology_BE, inplace=True) data_BE["technology"].replace('NaN', np.nan, inplace=True) data_BE.loc[(data_BE['name'].str.contains(' ST') | data_BE['name'].str.contains(' ST ')) & ((data_BE['technology'] == 'NaN') | data_BE['technology'].isnull()), 'technology'] = 'Steam turbine' data_BE.loc[(data_BE['name'].str.contains(' GT') | data_BE['name'].str.contains(' GT ')) & ((data_BE['technology'] == 'NaN') | data_BE['technology'].isnull()), 'technology'] = 'Gas turbine' # Check if all technologies have been translated for technology in data_BE["technology"].unique(): if (technology not in dict_technology_BE.values()) & (str(technology) != 'NaN'): logger.error("Untranslated technology: " + str(technology)) data_BE.head() # ### 4.1.5 Definition of energy sources # Overall translation of all energy sources types mentioned in the column "energy_sources" and subsequent translation check. Deletion of rows containing "wind" as energy source. # In[ ]: # Translate energy sources dict_energysources_BE = {'BIO': 'Biomass and biogas', 'BF': 'Other fossil fuels', 'CL': 'Lignite', 'CP': 'Hard coal', 'CG': 'Other fossil fuels', 'GO': 'Oil', 'LF': 'Oil', 'LV': 'Oil', 'CP/BF': 'Mixed fossil fuels', 'CP/CG': 'Mixed fossil fuels', 'FA/BF': 'Mixed fossil fuels', 'NG/BF': 'Mixed fossil fuels', 'NG': 'Natural gas', 'NU': 'Nuclear', 'WR': 'Non-renewable waste', 'WA': 'Hydro', 'WI': 'Wind', 'WP': 'Biomass and biogas'} data_BE["energy_source"].replace(dict_energysources_BE, inplace=True) data_BE["energy_source"].replace('NaN', np.nan, inplace=True) # Check if all energy sources have been translated for energysource in data_BE["energy_source"].unique(): if (energysource not in dict_energysources_BE.values()) & (str(energysource) != "NaN"): logger.error("Untranslated energy source: " + str(energysource)) # Delete unwanted energy source data_BE = data_BE[data_BE.energy_source != 'Wind'] # ### 4.1.6 Additional information on geographic coordinates and EIC codes # In this section a manually compiled list is used to define the geographic coordinates of indivdual power plants. # In[ ]: # Access the second list with EIC-Codes and geographic coordinates filepath = os.path.join('input', 'input_plant_locations_BE.csv') additional_data_BE = pd.read_csv( filepath, encoding='utf-8', header=0, index_col=None) # Initiate matching data_BE = data_BE.merge(additional_data_BE, left_on='name', right_on='name', how='left', suffixes=('_x', '')) # Drop columns after merger colsToDrop = ['eic_code_x', 'lat_x', 'lon_x'] data_BE = data_BE.drop(colsToDrop, axis=1) data_BE.head() # ## 4.2 The Netherlands NL # ### 4.2.1 Data import and merger # The data is provided by the Dutch transmission network operator [**TenneT**](http://www.tennet.org/english/operational_management/export_data.aspx). It encompasses the daily available generation capacity, thus a list of Dutch generation units being operational on a specific day. The data is downloaded for all four quarter in 2015. # **Import of quartely data** # In[ ]: data_NL_Q1 = importdata('NL', 'Tennet_Q1') data_NL_Q2 = importdata('NL', 'Tennet_Q2') data_NL_Q3 = importdata('NL', 'Tennet_Q3') data_NL_Q4 = importdata('NL', 'Tennet_Q4') # **Merge quartely data** # In[ ]: dataframes = [data_NL_Q1, data_NL_Q2, data_NL_Q3, data_NL_Q4] data_NL = pd.concat(dataframes) # ### 4.2.2 Translation and harmonization of columns # The imported data is standardized with respect to the columns as defined in section [2.4](#2.6-Definition-of-harmonized-output-scheme). In a first step, existing and output-relevant columns are translated and remaining columns are deleted in a second step. Columns which are not exist in the data set, but required for the output are additionally added in this process. # In[ ]: # Merge columns "street" and "Number" to one column called "Street" data_NL['street'] = data_NL[['street', 'Number']].apply( lambda x: '{} {}'.format(x[0], x[1]), axis=1) # Drop columns not needed anymore colsToDrop = ['Location', 'Date', 'Number'] data_NL = data_NL.drop(colsToDrop, axis=1) # Rename columns dict_columns_NL = {'Connected body': 'company', 'Entity': 'name', 'Fuel': 'energy_source', 'Capacity': 'capacity', 'zipcode': 'postcode', 'place-name': 'city'} data_NL.rename(columns=dict_columns_NL, inplace=True) # Adjust types of entries in all columns data_NL.capacity = data_NL.capacity.astype(float) # ### 4.2.3 Definition of energy sources # Overall translation of all energy sources types mentioned in the column "energy_sources" and subsequent translation check. Generation of entries for the column "technology" according to information given in the column "energy_source" by [TenneT](http://www.tennet.org/english/operational_management/system_data_preparation/Reported_production_capacity/Installed_capacity.aspx). # In[ ]: # Rename types of energy sources dict_energysources_NL = {'E01': 'Solar', 'E02': 'Wind', 'E03': 'Hydro', 'E04': 'Biomass and biogas', 'E05': 'Hard coal', 'E06': 'Natural gas', 'E07': 'Oil', 'E08': 'Nuclear', 'E09': 'Other or unspecified energy sources'} data_NL["energy_source"].replace(dict_energysources_NL, inplace=True) # Check if all energy sources have been translated for energysource in data_NL["energy_source"].unique(): if (energysource not in dict_energysources_NL.values()) & (str(energysource) != "NaN"): logger.error("Not renamed energy source: " + str(energysource)) # Generate technology entry according to energy source data_NL.loc[data_NL['energy_source'] == 'Nuclear', 'technology'] = 'Steam turbine' data_NL.loc[data_NL['energy_source'] == 'Hard coal', 'technology'] = 'Steam turbine' # Delete unwanted energy sources in column "energy_source" data_NL = data_NL[data_NL.energy_source != 'Solar'] data_NL = data_NL[data_NL.energy_source != 'Wind'] # ### 4.2.4 Adjustment of capacity for plant Rijnmond II (version '2016-10-27') # Adjustment of the capacity entry for the row relating to the power plant named "Rijnmond II". # In[ ]: # Show data entries for Rijnmond II # data_NL.loc[data_NL['name'] == 'Rijnmond II','capacity'] # In[ ]: # Data for power plant 'Rijnmond II' are daily total capacity if download_from == 'opsd_server': if version == '2016-10-27': data_NL['capacity_new'] = (data_NL['capacity']/24).where( data_NL.name == 'Rijnmond II') data_NL.loc[data_NL[ 'name'] == 'Rijnmond II', 'capacity'] = data_NL.loc[data_NL[ 'name'] == 'Rijnmond II', 'capacity_new'] data_NL = data_NL.drop(['capacity_new'], axis=1) # ### 4.2.5 Select daily entry with highest available capacity # We estimate the installed capacity by the highest available daily capacity for each unit. # In[ ]: # Filter rows by considering "name" and maximum "capacity data_NL = data_NL.sort_values( 'capacity', ascending=False).groupby('name', as_index=False).first() # Apply general template of columns data_NL = data_NL.reindex(columns=columns_sorted) # ### 4.2.6 Additional information on geographic coordinates and EIC codes # In this section a manually compiled list is used to define the geographic coordinates of indivdual power plants. # In[ ]: # Access the second list with EIC-Codes and geographic coordinates filepath = os.path.join('input', 'input_plant_locations_NL.csv') additional_data_NL = pd.read_csv( filepath, encoding='utf-8', header=0, index_col=None) # Initiate matching data_NL = data_NL.merge(additional_data_NL, left_on='name', right_on='name', how='left', suffixes=('_x', '')) # Drop columns after merger colsToDrop = ['eic_code_x', 'lat_x', 'lon_x'] data_NL = data_NL.drop(colsToDrop, axis=1) data_NL.head() # ## 4.3 France FR # ### 4.3.1 Data import # The data is provided by the French transmission network operator [**RTE**](http://clients.rte-france.com/servlets/CodesEICServlet). It encompasses a detailed list of French generation units with a capacity of more than 100 MW. # In[ ]: data_FR = importdata('FR', 'RTE') # ### 4.3.2 Translation and harmonization of columns # The imported data is standardized with respect to the columns as defined in section [2.4](#2.6-Definition-of-harmonized-output-scheme). In a first step, existing and output-relevant columns are translated and remaining columns are deleted in a second step. Columns which are not exist in the data set, but required for the output are additionally added in this process. # In[ ]: # Translate columns dict_columns_FR = {'Type': 'energy_source', 'Nom de la centrale de production': 'name', 'Capacité de production Installée (MW)': 'capacity', 'Date de création': 'commissioned', 'Country': 'country', 'Source': 'source'} data_FR.rename(columns=dict_columns_FR, inplace=True) # Apply general template of columns data_FR = data_FR.reindex(columns=columns_sorted) # Delete unwanted row by referring to column "Name" data_FR = data_FR.dropna(subset=['name']) # Delete place holder datetime data_FR["commissioned"].replace('01/01/2000', np.nan, inplace=True) # Define commissioning year data_FR['commissioned'] = pd.to_datetime( data_FR['commissioned'], format='%d/%m/%Y') data_FR['commissioned'] = pd.DatetimeIndex( data_FR['commissioned']).year # Adjust types of entries in all columns data_FR.capacity = data_FR.capacity.astype(float) # ### 4.3.3 Treatment of duplicate data entries # The list comprises duplicate entries for specific power plants. If they are commissioned, decommissioned, or retrofitted, the power plant is listed twice with differences in the installed capacity and the commissioning years. In the following, the first data entry is choosen as it seems to be most recent one. # # **Note**: The commissioning or decommissioning year is currently not adjusted. A comment is set in the corresponding column 'comment'. # In[ ]: # Show duplicate data entries data_FR[data_FR.duplicated(subset='name', keep=False)] # In[ ]: # Set comment for duplicate entries data_FR.loc[data_FR.duplicated( subset='name', keep=False), 'commissioned'] = np.nan data_FR.loc[data_FR.duplicated( subset='name', keep=False), 'comment'] = 'Commissioning year not reported due to duplicate entries' # Drop last duplicate data_FR = data_FR.drop_duplicates(subset='name', keep='first') # ### 4.3.4 Definition of energy sources and generation of technology types # Generation of entries for technologies. Overall translation of all energy sources types mentioned in the column "energy_sources" and subsequent translation check. # In[ ]: # Generate technology entries according to energy sources data_FR.loc[data_FR['energy_source'] == 'Hydraulique STEP', 'technology'] = 'Pumped storage' data_FR.loc[data_FR['energy_source'] == '''Hydraulique fil de l'eau / éclusée''', 'technology'] = 'Run-of-river' data_FR.loc[data_FR['energy_source'] == 'Hydraulique lacs', 'technology'] = 'Reservoir' data_FR.loc[data_FR['energy_source'] == 'Nucléaire', 'technology'] = 'Steam turbine' data_FR.loc[data_FR['energy_source'] == 'Charbon', 'technology'] = 'Steam turbine' # Translate types of energy sources dict_energysources_FR = {'Autre': 'Other or unspecified energy sources', 'Charbon': 'Hard coal', 'Fioul': 'Oil', 'Gaz': 'Natural gas', 'Hydraulique STEP': 'Hydro', '''Hydraulique fil de l'eau / éclusée''': 'Hydro', 'Hydraulique lacs': 'Hydro', 'Marin': 'Marine', 'Nucléaire': 'Nuclear', 'Biomasse': 'Biomass and biogas'} data_FR["energy_source"].replace(dict_energysources_FR, inplace=True) # Check if all energy sources have been translated for energysource in data_FR["energy_source"].unique(): if (energysource not in dict_energysources_FR.values()) & (str(energysource) != "NaN"): logger.error("Untranslated energy source: " + str(energysource)) # Delete unwanted energy sources in column "energy_source" data_FR = data_FR[data_FR.energy_source != 'Wind'] data_FR = data_FR[data_FR.energy_source != 'Solar'] data_FR = data_FR[data_FR.energy_source != 'Marine'] # ### 4.3.5 Additional information on geographic coordinates and EIC codes # In this section a manually compiled list is used to define the geographic coordinates of indivdual power plants. # In[ ]: # Access the second list with EIC-Codes and geographic coordinates filepath = os.path.join('input', 'input_plant_locations_FR.csv') additional_data_FR = pd.read_csv( filepath, encoding='utf-8', header=0, index_col=None) # Initiate matching data_FR = data_FR.merge(additional_data_FR, left_on='name', right_on='name', how='left', suffixes=('_x', '')) # Drop columns after merger colsToDrop = ['eic_code_x', 'lat_x', 'lon_x'] data_FR = data_FR.drop(colsToDrop, axis=1) data_FR.head() # ## 4.4 Poland PL # ### 4.4.1 Data import # The data is provided by the Polish Power Exchange [**GPI**](http://gpi.tge.pl/en/wykaz-jednostek?p_p_id=powerunits_WAR_powerunitsportlet&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_count=1 # ). It encompasses a detailed list of large Polish generation units with information on the single power plant blocks. # In[ ]: data_PL = importdata('PL', 'GPI') # ### 4.4.2 Translation and harmonization of columns # The imported data is standardized with respect to the columns as defined in section [2.4](#2.6-Definition-of-harmonized-output-scheme). In a first step, existing and output-relevant columns are translated and remaining columns are deleted in a second step. Columns which are not exist in the data set, but required for the output are additionally added in this process. # In[ ]: # Rename first column data_PL.columns.values[0] = 'company' # Rename columns dict_columns_PL = {'Generating unit name': 'name', 'Comments': 'comment', 'Available capacity [MW]': 'capacity', 'Basic fuel': 'energy_source', 'Country': 'country', 'Source': 'source'} data_PL.rename(columns=dict_columns_PL, inplace=True) # Fill columns "energy_source" and "company" with the belonging entries cols = ['energy_source', 'company'] data_PL[cols] = data_PL[cols].ffill() # Delete empty and therefore unwanted rows by referring to column "Generating unit code" data_PL = data_PL.dropna(subset=['Generating unit code']) # Apply general template of columns data_PL = data_PL.reindex(columns=columns_sorted) # Adjust types of entries in all columns data_PL.capacity = data_PL.capacity.astype(float) # ### 4.4.3 Definition of energy sources # Overall translation of all energy sources types mentioned in the column "energy_sources" and subsequent translation check. # In[ ]: # Rename energy sources types dict_energysources_PL = {'Brown coal': 'Lignite', 'Black coal': 'Hard coal', 'Water': 'Hydro'} data_PL["energy_source"].replace(dict_energysources_PL, inplace=True) # Check if all energy sources have been translated for energysource in data_PL["energy_source"].unique(): if (energysource not in dict_energysources_PL.values()) & (str(energysource) != "NaN"): logger.error("Not renamed energy source: " + str(energysource)) # ### 4.4.4 Definition of generation technology types # Generation of entries for the column "technology" according to information given in the column "energy_source". # In[ ]: # Generate entries in column "technology" according to energy source "hydro" data_PL.loc[data_PL['energy_source'] == 'Hydro', 'technology'] = 'Pumped storage' # ### 4.4.5 Additional information on further power plants, geographic coordinates and EIC codes # In this section a manually compiled list is used to define the geographic coordinates of indivdual power plants. # In[ ]: # Access the second list filepath = os.path.join('input', 'input_plant_locations_PL.csv') additional_data_PL = pd.read_csv( filepath, encoding='utf-8', header=0, index_col=None) # Initiate merger data_PL = data_PL.merge(additional_data_PL, left_on='name', right_on='name', how='left', suffixes=('_x', '')) # Drop columns after merger colsToDrop = ['eic_code_x', 'lat_x', 'lon_x'] data_PL = data_PL.drop(colsToDrop, axis=1) data_PL.head() # ## 4.5 Czech Republic CZ # ### 4.5.1 Data import # The data is provided by the Czech transmission network operator [**CEPS**](http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=xlsx&from=1/1/2010%2012:00:00%20AM&to=12/31/2015%2011:59:59%20PM&hasinterval=False&sol=9&lang=ENG&ver=YF& # ). It encompasses the daily available capacity reported by the transmission system operator. # In[ ]: data_CZ = importdata('CZ', 'CEPS') # ### 4.5.2 Translation and harmonization of columns # The imported data is standardized with respect to the columns as defined in section [2.4](#2.6-Definition-of-harmonized-output-scheme). In a first step, existing and output-relevant columns are translated and remaining columns are deleted in a second step. Columns which are not exist in the data set, but required for the output are additionally added in this process. # In[ ]: # Merge columns "Power plant" and "Generating unit" to one column called "Name" data_CZ['name'] = data_CZ[['Power plant', 'Generating unit']].apply( lambda x: '{} {}'.format(x[0], x[1]), axis=1) # Rename columns dict_columns_CZ = {'Available capacity [MW]': 'capacity', 'Type of source': 'technology', 'Country': 'country', 'Source': 'source'} data_CZ.rename(columns=dict_columns_CZ, inplace=True) # Filter rows by considering "name" and maximum "capacity" data_CZ = data_CZ.sort_values( 'capacity', ascending=False).groupby('name', as_index=False).first() # Apply general template of columns data_CZ = data_CZ.reindex(columns=columns_sorted) # Adjust types of entries in all columns data_CZ.capacity = data_CZ.capacity.astype(float) # ### 4.5.3 Definition of generation technology types # Overall translation of all technology types mentioned in the column "technology" and subsequent translation check. # In[ ]: # Generate entries in column "energy_source" according to column "technology" data_CZ.loc[data_CZ['technology'] == 'Jaderná elektrárna', 'energy_source'] = 'Nuclear' data_CZ.loc[data_CZ['technology'] == 'Přečerpávací vodní elektrárna', 'energy_source'] = 'Hydro' data_CZ.loc[data_CZ['technology'] == 'Parní elektrárna', 'energy_source'] = 'Other or unspecified energy sources' data_CZ.loc[data_CZ['technology'] == 'Paroplynová elektrárna', 'energy_source'] = 'Other or unspecified energy sources' # Translate technologies dict_technologies_CZ = {'Přečerpávací vodní elektrárna': 'Pumped storage', 'Parní elektrárna': 'Steam turbine', 'Jaderná elektrárna': 'Steam turbine', 'Paroplynová elektrárna': 'Combined cycle'} data_CZ["technology"].replace(dict_technologies_CZ, inplace=True) # Check if all technologies have been translated for technology in data_CZ["technology"].unique(): if (technology not in dict_technologies_CZ.values()) & (str(technology) != "NaN"): logger.error("Untranslated technology: " + str(technology)) # ### 4.5.4 Additional information on further power plants, geographic coordinates and EIC codes # In this section a manually compiled list is used to define the geographic coordinates of indivdual power plants. # In[ ]: # Access the second list with EIC-Codes and geographic coordinates filepath = os.path.join('input', 'input_plant_locations_CZ.csv') additional_data_CZ = pd.read_csv( filepath, encoding='utf-8', header=0, index_col=None) # Initiate merger data_CZ = data_CZ.merge(additional_data_CZ, left_on='name', right_on='name', how='left', suffixes=('_x', '')) # Drop columns after merger colsToDrop = ['eic_code_x', 'lat_x', 'lon_x'] data_CZ = data_CZ.drop(colsToDrop, axis=1) data_CZ.head() # ## 4.6 Switzerland CH # ### 4.6.1 Data import # The data is provided by the Swiss Ministry of Energy [**BFE**](http://www.bfe.admin.ch/php/modules/publikationen/stream.php?extlang=de&name=de_416798061.zip&endung=Statistik%20der%20Wasserkraftanlagen%20der%20Schweiz # ). It encompasses a detailed list of Swiss hydro generation units with comprehensive information on technical specifications. # **Hydro power** # In[ ]: data_CH = importdata('CH', 'BFE') # **Nuclear power plants** # In[ ]: filepath = os.path.join('input', 'input_plant-list_CH_conventional.csv') data_nuclear_CH = pd.read_csv( filepath, encoding='utf-8', header=0, index_col=None) # ### 4.6.2 Consolidation, translation and harmonization of columns # **Consolidation of columns** # In[ ]: # Merge columns "ZE-Erste Inbetriebnahme" and "ZE-Letzte Inbetriebnahme" to one column called "Commissioned" data_CH['commissioned'] = data_CH[ ['ZE-Erste Inbetriebnahme', 'ZE-Letzte Inbetriebnahme']].apply( lambda x: max(x[0], x[1]), axis=1) # Merge columns "Bemerkung (1) - (10)" to one column "Comment" data_CH['comment'] = data_CH[['Bemerkung (1)', 'Bemerkung (2)', 'Bemerkung (3)', 'Bemerkung (4)', 'Bemerkung (5)', 'Bemerkung (6)', 'Bemerkung (7)', 'Bemerkung (8)', 'Bemerkung (9)', 'Bemerkung (10)']].apply( lambda x: '{}; {}; {}; {}; {}; {}; {}; {}; {}; {}'.format( x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9]), axis=1) data_CH['comment'] = data_CH['comment'].str.replace('nan; ', '') data_CH['comment'] = data_CH['comment'].str.replace('nan', '') # **Transformation of geographical hydro power data to WGS84 projection** # In[ ]: # LatLon with WGS84 datum used by GPS units and Google Earth wgs84 = pyproj.Proj("+init=EPSG:4326") # CH1903 projection used in BFE-Data ch1903 = pyproj.Proj("+init=EPSG:21781") data_CH[['lon', 'lat']] = data_CH[ ['ZE-Koordinaten unscharf (Nord)', 'ZE-Koordinaten unscharf (Ost)']].apply( lambda row: pyproj.transform( ch1903, wgs84, row[1], row[0]), axis=1).apply(pd.Series) # **Translation and harmonization of columns** # The imported data is standardized with respect to the columns as defined in section [2.4](#2.6-Definition-of-harmonized-output-scheme). In a first step, existing and output-relevant columns are translated and remaining columns are deleted in a second step. Columns which are not exist in the data set, but required for the output are additionally added in this process. # In[ ]: # Translate columns dict_columns_CH = {'WKA-Name': 'name', 'ZE-Standort': 'city', 'WKA-Typ': 'technology', 'ZE-Status': 'availability', 'Inst. Turbinenleistung': 'capacity'} data_CH.rename(columns=dict_columns_CH, inplace=True) # Adjust type of entries in column "Capacity" data_CH.capacity = data_CH.capacity.astype(float) # Adjust availabilities dict_availabilities_CH = {'im Normalbetrieb': '1', 'im Bau': '0', 'im Umbau': '0', 'stillgelegt': '0'} data_CH["availability"].replace(dict_availabilities_CH, inplace=True) # List only operating plants data_CH = data_CH[data_CH.availability != '0'] # Apply general template of columns data_CH = data_CH.reindex(columns=columns_sorted) # ### 4.6.3 Definition of generation technology types # In[ ]: # Set energy source to "hydro" data_CH['energy_source'] = 'Hydro' # Adjust technologies dict_technologies_CH = {'L': 'Run-of-river', 'S': 'Reservoir', 'P': 'Pumped storage with natural inflow', 'U': 'Pumped storage'} data_CH["technology"].replace(dict_technologies_CH, inplace=True) # ### 4.6.4 Additional information on geographic coordinates and EIC codes # In this section a manually compiled list is used to define the geographic coordinates of indivdual hydro power plants. # In[ ]: # Access the second list with EIC-Codes and geographic coordinates filepath = os.path.join('input', 'input_plant_locations_CH.csv') additional_data_CH = pd.read_csv( filepath, encoding='utf-8', header=0, index_col=None) # Initiate merger data_CH = data_CH.merge(additional_data_CH, left_on='name', right_on='name', how='left', suffixes=('_x', '')) # Drop columns after merger colsToDrop = ['eic_code_x', 'lat_x', 'lon_x'] data_CH = data_CH.drop(colsToDrop, axis=1) # ### 4.6.5 Merge hydro and nuclear power plant data # In[ ]: data_CH = data_CH.append(data_nuclear_CH) data_CH.head() # ## 4.7 Italy IT # ### 4.7.1 Data import # The data is provided by the Italian transmission network operator [**TERNA**](http://download.terna.it/terna/0000/0216/16.XLSX). It encompasses a detailed list of Italian generation units of more than 100 MW. # In[ ]: data_IT = importdata('IT', 'Terna') # ### 4.7.2 Translation and harmonization of columns # The imported data is standardized with respect to the columns as defined in section [2.4](#2.6-Definition-of-harmonized-output-scheme). In a first step, existing and output-relevant columns are translated and remaining columns are deleted in a second step. Columns which are not exist in the data set, but required for the output are additionally added in this process. # In[ ]: # Translate columns dict_columns_IT = {'Descrizione Impianto': 'name', 'TIPOLOGIA': 'energy_source', 'Comune': 'city', 'PMAX [MW]': 'capacity', 'Country': 'country', 'Source': 'source', 'Zona': 'additional_info'} data_IT.rename(columns=dict_columns_IT, inplace=True) # Apply general template of columns data_IT = data_IT.reindex(columns=columns_sorted) # Consider of geographical information in column "additional_info" data_IT['additional_info'] = data_IT[['additional_info']].apply( lambda x: 'Zone: {}'.format(x[0]), axis=1) # Adjust types of entries in all columns data_IT.capacity = data_IT.capacity.astype(float) # ### 4.7.3 Definition of energy sources # Overall translation of all energy source types mentioned in the column "energy_sources" and subsequent translation check. Deletion of rows containing "wind" and "geothermal_power"as energy source. # In[ ]: # Translate types of energy sources dict_energysources_IT = {'GEOTERMICO': 'Geothermal', 'TERMOELETTRICO': 'Fossil fuels', 'IDROELETTRICO': 'Hydro', 'EOLICO': 'Wind'} data_IT["energy_source"].replace(dict_energysources_IT, inplace=True) # Check if all energy sources have been translated for energysource in data_IT["energy_source"].unique(): if (energysource not in dict_energysources_IT.values()) & (str(energysource) != "NaN"): logger.error("Untranslated energy source: " + str(energysource)) # Delete unwanted energy sources in column "energy_source" data_IT = data_IT[data_IT.energy_source != 'Wind'] data_IT = data_IT[data_IT.energy_source != 'Geothermal'] # ### 4.7.4 Additional information on geographic coordinates and EIC codes # In this section a manually compiled list is used to define the geographic coordinates of indivdual power plants. # In[ ]: # Access the second list with EIC-Codes and geographic coordinates filepath = os.path.join('input', 'input_plant_locations_IT.csv') additional_data_IT = pd.read_csv( filepath, encoding='utf-8', header=0, index_col=None) # Initiate merger data_IT = data_IT.merge(additional_data_IT, left_on='name', right_on='name', how='left', suffixes=('_x', '')) # Drop columns after merger colsToDrop = ['eic_code_x', 'lat_x', 'lon_x'] data_IT = data_IT.drop(colsToDrop, axis=1) data_IT.head() # ## 4.8 Finland FI # ### 4.8.1 Data import # The data is provided by the Finnish [**Energy Authority**](http://www.energiavirasto.fi/documents/10191/0/Energiaviraston+Voimalaitosrekisteri+010117.xlsx). It encompasses a detailed list of Finnish generation units of at least one megavolt ampere [1 MVA]. # In[ ]: data_FI = importdata('FI', 'EnergyAuthority') # ### 4.8.2 Translation and harmonization of columns # The imported data is standardized with respect to the columns as defined in section [2.4](#2.6-Definition-of-harmonized-output-scheme). In a first step, existing and output-relevant columns are translated and remaining columns are deleted in a second step. Columns which are not exist in the data set, but required for the output are additionally added in this process. # In[ ]: # Generate entries in column "CHP" data_FI.loc[data_FI[ 'Combined Heat and Power Production, Industry,Maximum, Total, MW'] > 0, 'chp'] = 'Yes' data_FI.loc[data_FI[ 'Combined Heat and Power Production, District Heating, Total, MW'] > 0, 'chp'] = 'Yes' # Drop columns not needed anymore colsToDrop = ['Business ID', 'Location', 'Separate power production, Maximum, Hour, MW', 'Separate power production, Decomissioned, Hour, MW', 'Combined Heat and Power Production, Industry,Maximum, Total, MW', 'Combined Heat and Power Production, Industry,Hour, Total, MW', 'Combined Heat and Power Production, Industry, Decomissioned, Total, MW', 'Combined Heat and Power Production, District Heating, Total, MW', 'Combined Heat and Power Production, District Heating, Hour, MW', 'Combined Heat and Power Production, District Heating, Decomissioned, Total, MW', 'Separate power production, Maximum, Total, MW', 'Hour, total, MW', 'Decomissioned, Total, MW', 'Standby fuel ', 'Standby fuel'] data_FI = data_FI.drop(colsToDrop, axis=1) # Rename columns dict_columns_FI = {'Name': 'name', 'Company': 'company', 'Type': 'type', 'Address': 'street', 'Town': 'city', 'Postal code': 'postcode', 'Maximum, total, MW': 'capacity', 'Main fuel': 'energy_source', 'Country': 'country', 'Source': 'source'} data_FI.rename(columns=dict_columns_FI, inplace=True) # Apply general template of columns data_FI = data_FI.reindex(columns=columns_sorted) # Adjust types of entries in all columns data_FI.capacity = data_FI.capacity.astype(float) # ### 4.8.3 Definition of energy sources # Overall translation of all energy sources types mentioned in the column "energy_sources" and subsequent translation check. Generation of entries for the column "energy_scoures" according to information given in the column "type". # In[ ]: # Rename types of energy sources dict_energysources_FI = {'Biogas': 'Biomass and biogas', 'Black liquor and concentrated liquors': 'Biomass and biogas', 'Blast furnace gas': 'Other fossil fuels', 'By-products from wood processing industry': 'Biomass and biogas', 'Exothermic heat from industry': 'Other or unspecified energy sources', 'Forest fuelwood': 'Biomass and biogas', 'Gasified waste': 'Non-renewable waste', 'Hard coal and anthracite': 'Hard coal', 'Heavy distillates': 'Oil', 'Industrial wood residues': 'Biomass and biogas', 'Light distillates': 'Oil', 'Medium heavy distillates': 'Oil', 'Mixed fuels': 'Mixed fossil fuels', 'Natural gas': 'Natural gas', 'Nuclear energy': 'Nuclear', 'Other by-products and wastes used as fuel': 'Other fossil fuels', 'Other non-specified energy sources': 'Other or unspecified energy sources', 'Peat': 'Biomass and biogas', ' ': 'Other or unspecified energy sources', np.nan: 'Other or unspecified energy sources'} data_FI["energy_source"].replace(dict_energysources_FI, inplace=True) data_FI["energy_source"].replace('NaN', np.nan, inplace=True) # Check if all energy sources have been translated for energysource in data_FI["energy_source"].unique(): if (energysource not in dict_energysources_FI.values()) & (str(energysource) != "NaN"): logger.error("Untranslated energy source: " + str(energysource)) # Generate entries in column "energy_sources" for hydro and wind stations according to column "type" data_FI.loc[data_FI['type'] == 'Hydro power', 'energy_source'] = 'Hydro' data_FI.loc[data_FI['type'] == 'Wind power', 'energy_source'] = 'Wind' # ### 4.8.4 Definition of generation technology types # Generation of entries for the column "technology" according to information given in the column "energy_source". Deletion of rows containing "wind" as energy source. # In[ ]: # Generate entries in column "technology" according to column "energy_source" data_FI.loc[data_FI['energy_source'] == 'Nuclear', 'technology'] = 'Steam turbine' data_FI.loc[data_FI['energy_source'] == 'Hard coal', 'technology'] = 'Steam turbine' # Delete unwanted energy source (wind) in column "energy_source" data_FI = data_FI[data_FI.energy_source != 'Wind'] # ### 4.8.5 Definition of generation type # Overall translation of all types mentioned in the column "type" and subsequent translation check. # In[ ]: # Rename types dict_types_FI = {'District heating CHP': 'CHP', 'Hydro power': 'NaN', 'Industry CHP': 'IPP', 'Nuclear energy': 'NaN', 'Separate electricity production': 'NaN', 'Wind power': 'NaN'} data_FI["type"].replace(dict_types_FI, inplace=True) data_FI["type"].replace('NaN', np.nan, inplace=True) # ### 4.8.6 Additional information on geographic coordinates and EIC codes # In this section a manually compiled list is used to define the geographic coordinates of indivdual power plants. # In[ ]: # Access the second list with EIC-Codes and geographic coordinates filepath = os.path.join('input', 'input_plant_locations_FI.csv') additional_data_FI = pd.read_csv( filepath, encoding='utf-8', header=0, index_col=None) # Initiate merger data_FI = data_FI.merge(additional_data_FI, left_on='name', right_on='name', how='left', suffixes=('_x', '')) # Drop columns after merger colsToDrop = ['eic_code_x', 'lat_x', 'lon_x'] data_FI = data_FI.drop(colsToDrop, axis=1) data_FI.head() # ## 4.9 Spain ES # ### 4.9.1 Data import # The data is provided by the Spanish [**SEDE - Ministry of Industry, Energy and Tourism**](http://www6.mityc.es/aplicaciones/electra/ElectraExp.csv.zip). It encompasses a detailed list of Spanish generation units with comprehensive information on technologies and energy fuels. # In[ ]: data_ES = importdata('ES', 'SEDE') # ### 4.9.2 Translation and harmonization of columns # Overall adjustment of all columns within the dataframe. Translation, addition, deletion, sorting of columns as well as adjustment of the column entries' types. Adjustment of the entries' units from kW to MW in the columns "Capacity" (corresponding to the net capacity in the original data set). # In[ ]: # Delete unwanted regions data_ES = data_ES[data_ES.Autonomia != 'Ceuta'] data_ES = data_ES[data_ES.Autonomia != 'Melilla'] data_ES = data_ES[data_ES.Autonomia != 'Canarias'] data_ES = data_ES[data_ES.Autonomia != 'Baleares'] # Delete unwanted energy source data_ES = data_ES[data_ES.Tecnologia != 'Fotovoltaica'] # Drop columns not needed anymore colsToDrop = ['N. Orden', 'Tipo Regimen', 'Autonomia', 'Provincia', 'F. Alta', 'F. Baja', 'F. Alta Provicional', 'Alta Registro'] data_ES = data_ES.drop(colsToDrop, axis=1) # Rename columns dict_columns_ES = {'Titular': 'company', 'Nombre de la instalacion': 'name', 'Municipio': 'city', 'Tecnologia': 'technology', 'Comment': 'comment', 'Potencia Neta': 'capacity', 'Potencia Bruta': 'gross_capacity', 'Combustible': 'energy_source', 'F. Puesta En Servicio': 'commissioned', 'Country': 'country', 'Source': 'source'} data_ES.rename(columns=dict_columns_ES, inplace=True) # Check if all columns have been renamed for columnnames in data_ES.columns: if columnnames not in dict_columns_ES.values(): logger.error("Not renamed column: " + columnnames) # Apply general template of columns data_ES = data_ES.reindex(columns=columns_sorted) # Change unit of column 'capacity' from kW to MW data_ES.capacity = data_ES.capacity.astype(float) data_ES['capacity'] = (data_ES['capacity'] / 1000) # Define commissioning year data_ES['commissioned'] = pd.to_datetime( data_ES['commissioned'], format='%d/%m/%Y') data_ES['commissioned'] = pd.DatetimeIndex( data_ES['commissioned']).year # ### 4.9.3 Definition of energy sources # Overall translation of all energy sources types mentioned in the column "energy_sources" and subsequent translation check. # In[ ]: dict_energysources_ES = {'Biocombustibles liquidos': 'Biomass and biogas', 'Biogas': 'Biomass and biogas', 'Biogas de digestion': 'Biomass and biogas', 'Biogas de vertedero': 'Biomass and biogas', 'Biomasa industrial agricola': 'Biomass and biogas', 'Biomasa industrial forestal': 'Biomass and biogas', 'Biomasa primaria': 'Biomass and biogas', 'Calor residual': 'Other or unspecified energy sources', 'Carbon': 'Hard coal', 'CARBON IMPORTADO': 'Hard coal', 'Cultivos energeticos agricolas o forestales': 'Biomass and biogas', 'DIESEL': 'Oil', 'Energias residuales': 'Non-renewable waste', 'Fuel': 'Oil', 'FUEL-OIL 0,3': 'Oil', 'FUELOLEO': 'Oil', 'GAS DE REFINERIA': 'Natural gas', 'Gas natural': 'Natural gas', 'GAS NATURAL': 'Natural gas', 'Gas residual': 'Natural gas', 'Gasoleo': 'Oil', 'GASOLEO': 'Oil', 'HULLA+ANTRACITA': 'Hard coal', 'Licores negros': 'Biomass and biogas', 'LIGNITO NEGRO': 'Lignite', 'LIGNITO PARDO': 'Lignite', 'NUCLEAR': 'Nuclear', 'Propano': 'Natural gas', 'Residuo aprovechamiento forestal o selvicola': 'Other bioenergy and renewable waste', 'Residuos': 'Non-renewable waste', 'Residuos actividad agricolas o jardineria': 'Other bioenergy and renewable waste', 'Residuos industriales': 'Non-renewable waste', 'Residuos solidos urbanos': 'Non-renewable waste', 'RESIDUOS SOLIDOS URBANOS': 'Non-renewable waste', ' ': 'Other or unspecified energy sources', np.nan: 'Other or unspecified energy sources'} data_ES["energy_source"].replace(dict_energysources_ES, inplace=True) data_ES["energy_source"].replace('NaN', np.nan, inplace=True) # Check if all energy sources have been translated for energysource in data_ES["energy_source"].unique(): if (energysource not in dict_energysources_ES.values()) & (str(energysource) != "NaN"): logger.error("Not renamed energy source: " + str(energysource)) # ### 4.9.4 Definition of generation technology types # Overall translation of all technology types mentioned in the column "technology" and subsequent translation check. # In[ ]: dict_technologies_ES = {'Aprovechamiento de energias residuales': 'NaN', 'Biogas': 'NaN', 'Biomasa': 'NaN', 'Cogeneracion': 'NaN', 'Eolica terrestre': 'Onshore', 'Fotovoltaica': 'Photovoltaics', 'Hidraulica fluyente': 'Run-of-river', 'Hidraulica': 'Differently categorized hydro', 'Motor': 'Combustion engine', 'Otras': 'NaN', 'Termica': 'NaN', 'Termica clasica': 'NaN', 'Termonuclear': 'Steam turbine', 'Tratamiento de residuos': 'NaN', 'Turbina': 'NaN', 'Turbina de gas': 'Gas turbine', 'Residuos': 'NaN', 'Solar Termoelectrica': 'Concentrated solar power', ' ': 'NaN', np.nan: 'NaN'} data_ES["technology"].replace(dict_technologies_ES, inplace=True) data_ES["technology"].replace('NaN', np.nan, inplace=True) # Check if all technologies have been translated for technology in data_ES["technology"].unique(): if (technology not in dict_technologies_ES.values()) & (str(technology) != "NaN"): logger.error("Untranslated technology: " + str(technology)) # ### 4.9.5 Definition of energy sources # Generation of entries for the column "energy_source" according to information given in the column "technology". # In[ ]: # Generate entries in column "energy_source" according to technologies data_ES.loc[data_ES['technology'] == 'Differently categorized hydro', 'energy_source'] = 'Hydro' data_ES["technology"].replace('Differently categorized hydro', np.nan, inplace=True) data_ES.loc[data_ES['technology'] == 'Run-of-river', 'energy_source'] = 'Hydro' data_ES.loc[data_ES['technology'] == 'Onshore', 'energy_source'] = 'Wind' data_ES.loc[data_ES['technology'] == 'Photovoltaics', 'energy_source'] = 'Solar' data_ES.loc[data_ES['technology'] == 'Concentrated solar power', 'energy_source'] = 'Solar' # Delete unwanted energy sources data_ES = data_ES[data_ES.energy_source != 'Wind'] data_ES = data_ES[data_ES.energy_source != 'Solar'] # ### 4.9.6 Additional information on geographic coordinates and EIC codes # In this section a manually compiled list is used to define the geographic coordinates of indivdual power plants. **Please note**: The naming of power plants in the Spanish dataset is not unique creating duplicated entries during merge with geographical data. # In[ ]: # Access the second list with EIC-Codes and geographic coordinates filepath = os.path.join('input', 'input_plant_locations_ES.csv') additional_data_ES = pd.read_csv( filepath, encoding='utf-8', header=0, index_col=None) # Merge plant data with manually compiled geo-coordinates (currently duplicate entries due to merge based on name) data_ES = data_ES.merge(additional_data_ES, left_on='name', right_on='name', how='left', suffixes=('_x', '')) # Drop duplicate entries data_ES = data_ES.drop_duplicates(subset=['company', 'name', 'city'], keep='first') # Drop columns after merge colsToDrop = ['eic_code_x', 'lat_x', 'lon_x'] data_ES = data_ES.drop(colsToDrop, axis=1) data_ES.head() # ## 4.10 United Kingdom UK # ### 4.10.1 Data import # The data is provided by the British government's [**Statistical Office**](https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/446457/dukes5_10.xls). It encompasses a detailed list of British generation units with comprehensive information on technologies and energy fuels. # In[ ]: data_UK = importdata('UK', 'GOV') # ### 4.10.2 Translation and harmonization of columns # The imported data is standardized with respect to the columns as defined in section [2.4](#2.6-Definition-of-harmonized-output-scheme). In a first step, existing and output-relevant columns are translated and remaining columns are deleted in a second step. Columns which are not exist in the data set, but required for the output are additionally added in this process. # In[ ]: # Rename sixth column data_UK.columns.values[5] = 'Location' # Drop rows without station names, so that the footnotes at the end of the list are deleted data_UK = data_UK.dropna(subset=['Station Name']) # Drop columns not needed anymore colsToDrop = ['Footnotes'] data_UK = data_UK.drop(colsToDrop, axis=1) # Rename columns dict_columns_UK = {'Company Name': 'company', 'Station Name': 'name', 'Installed Capacity (MW)': 'capacity', 'Country': 'country', 'Location': 'location', 'Fuel': 'energy_source', 'Year of commission or year generation began': 'commissioned', 'Source': 'source'} data_UK.rename(columns=dict_columns_UK, inplace=True) # Check if all columns have been renamed for columnnames in data_UK.columns: if columnnames not in dict_columns_UK.values(): logger.error("Not renamed column: " + columnnames) # Adjust names of region dict_regions_UK = {'East': 'England', 'East Midlands': 'England', 'London': 'England', 'North East': 'England', 'North West': 'England', 'South East': 'England', 'South West': 'England', 'West Midlands': 'England', 'Yorkshire and the Humber': 'England', 'N Ireland': 'Northern Ireland'} data_UK["location"].replace(dict_regions_UK, inplace=True) # Merge columns "Country" and "Location" to one column called "Country" data_UK['additional_info'] = data_UK[['location']].apply( lambda x: 'Region: {}'.format(x[0]), axis=1) # Drop column "Location" after merger colsToDrop = ['location'] data_UK = data_UK.drop(colsToDrop, axis=1) # Adjust commissioning years dict_commissioning_UK = {'1926/2002': '2002'} data_UK["commissioned"].replace(dict_commissioning_UK, inplace=True) # Apply general template of columns data_UK = data_UK.reindex(columns=columns_sorted) # Adjust types of entries in all columns data_UK.capacity = data_UK.capacity.astype(float) # ### 4.10.3 Definition of generation technology types # Generation of entries for the column "technology" according to information given in the column "energy_source". # In[ ]: # Generate entries in column "technology" according to column "energy_source" data_UK.loc[data_UK['energy_source'] == 'Hydro / pumped storage', 'technology'] = 'Pumped storage' data_UK.loc[data_UK['energy_source'] == 'Pumped storage', 'technology'] = 'Pumped storage' data_UK.loc[data_UK['energy_source'] == 'Wind', 'technology'] = 'Onshore' data_UK.loc[data_UK['energy_source'] == 'Wind (offshore)', 'technology'] = 'Offshore' data_UK.loc[data_UK['energy_source'] == 'Nuclear', 'technology'] = 'Steam turbine' data_UK.loc[data_UK['energy_source'] == 'CCGT', 'technology'] = 'Combined cycle' data_UK.loc[data_UK['energy_source'] == 'OCGT', 'technology'] = 'Gas turbine' # ### 4.10.4 Definition of energy sources # Overall translation of all energy sources types mentioned in the column "energy_source" and subsequent translation check. Deletion of rows containing "wind" as energy source. # In[ ]: dict_energysources_UK = {'Biomass': 'Biomass and biogas', 'Biomass / gas / waste derived fuel': 'Mixed fossil fuels', 'CCGT': 'Natural gas', 'Coal': 'Hard coal', 'Coal / biomass': 'Mixed fossil fuels', 'Coal / biomass / gas / waste derived fuel': 'Mixed fossil fuels', 'Coal / oil': 'Mixed fossil fuels', 'Diesel': 'Oil', 'Gas': 'Natural gas', 'Gas / oil': 'Mixed fossil fuels', 'Gas oil': 'Oil', 'Gas oil / kerosene': 'Oil', 'Hydro': 'Hydro', 'Hydro / pumped storage': 'Hydro', 'Light oil': 'Oil', 'Meat & bone meal': 'Other bioenergy and renewable waste', 'Nuclear': 'Nuclear', 'OCGT': 'Natural gas', 'Oil': 'Oil', 'Pumped storage': 'Hydro', 'Straw': 'Biomass and biogas', 'Waste': 'Non-renewable waste', 'Wind': 'Wind', 'Wind (offshore)': 'Wind', 'Solar': 'Solar'} data_UK["energy_source"].replace(dict_energysources_UK, inplace=True) # Check if all energy sources have been translated for energysource in data_UK["energy_source"].unique(): if (energysource not in dict_energysources_UK.values()) & (str(energysource) != "NaN"): logger.error("Not renamed energy source: " + str(energysource)) # Delete unwanted energy sources data_UK = data_UK[data_UK.energy_source != 'Wind'] data_UK = data_UK[data_UK.energy_source != 'Solar'] # ### 4.10.5 Additional information on geographic coordinates and EIC codes # In this section a manually compiled list is used to define the geographic coordinates of indivdual power plants. # In[ ]: # Access the second list with EIC-Codes and geographic coordinates filepath = os.path.join('input', 'input_plant_locations_UK.csv') additional_data_UK = pd.read_csv( filepath, encoding='utf-8', header=0, index_col=None) # Initiate merger data_UK = data_UK.merge(additional_data_UK, left_on='name', right_on='name', how='left', suffixes=('_x', '')) # Drop columns after merger colsToDrop = ['eic_code_x', 'lat_x', 'lon_x'] data_UK = data_UK.drop(colsToDrop, axis=1) data_UK.head() # ## 4.11 Norway NO # The data is provided by the power exchange [**Nordpool**](http://www.nordpoolspot.com/globalassets/download-center/tso/generation-capacity_norway_valid-from-2-december-2013_larger-than-100mw.pdf). It encompasses a detailed list of Norwegian generation units with a capacity of more than 100 MW for 2013. # In[ ]: # Access local file filepath = os.path.join('input', 'input_plant-list_NO.csv') data_NO = pd.read_csv(filepath, encoding='utf-8', header=0, index_col=None) data_NO.head() # ## 4.12 Sweden SE # The data is provided by the power exchange [**Nordpool**](http://www.nordpoolspot.com/globalassets/download-center/tso/generation-capacity_sweden_larger-than-100mw-per-unit_17122014.pdf). It encompasses a detailed list of Swedish generation units with a capacity of more than 100 MW for 2014. # In[ ]: # Access local file filepath = os.path.join('input', 'input_plant-list_SE.csv') data_SE = pd.read_csv(filepath, encoding='utf-8', header=0, index_col=None) data_SE.head() # ## 4.13 Slovakia SK # The data is provided by the Slovakian utility [**Slovenské elektrárne a.s. (SEAS)**](https://www.seas.sk/power-plants). It encompasses a detailed list of Slovak generation units with comprehensive information on technologies and energy fuels. # In[ ]: # Access local file filepath = os.path.join('input', 'input_plant-list_SK.csv') data_SK = pd.read_csv(filepath, encoding='utf-8', header=0, index_col=None) # Delete unwanted energy source data_SK = data_SK[data_SK.energy_source != 'Solar'] data_SK.head() # ## 4.14 Slovenia SI # The data is provided by several Slovenian utilities. The respective data links are given in the column "source". This list encompasses Slovenian generation units with comprehensive information on technologies and energy fuels. # In[ ]: # Access local file filepath = os.path.join('input', 'input_plant-list_SI.csv') data_SI = pd.read_csv(filepath, encoding='utf-8', header=0, index_col=None) data_SI.head() # ## 4.15 Austria AT # The data for **conventional power plants** is provided by several Austrian utilities. The respective data links are given in the column "source". The specifications of Austrian **hydro power plants**, however, solely are based on [**Verbund AG**](https://www.verbund.com/de-at/ueber-verbund/kraftwerke/unsere-kraftwerke). The resulting list encompasses Austrian generation units with comprehensive information on technologies and energy fuels. # In[ ]: # Access local file for conventional plants filepath_conventional = os.path.join( 'input', 'input_plant-list_AT_conventional.csv') data_AT_conventional = pd.read_csv( filepath_conventional, encoding='utf-8', header=0, index_col=None) # Access local file for hydro plants filepath_hydro = os.path.join('input', 'input_plant-list_AT_hydro.csv') data_AT_hydro = pd.read_csv( filepath_hydro, encoding='utf-8', header=0, index_col=None) # Merge the lists data_AT = data_AT_conventional.append(data_AT_hydro, ignore_index=True) data_AT.head() # ## 4.16 Denmark DK # The data is provided by the Danish transmission network operator [**Energinet.dk**](https://www.energinet.dk/SiteCollectionDocuments/Engelske%20dokumenter/El/Energinet%20dk%27s%20assumptions%20for%20analysis%202014-2035,%20September%202014.xlsm). It encompasses a detailed list of Danish generation units with comprehensive information on technologies and energy fuels. # In[ ]: # Access local file for conventional plants filepath = os.path.join('input', 'input_plant-list_DK.csv') data_DK = pd.read_csv( filepath, encoding='utf-8', header=0, index_col=None) # List only operating plants data_DK = data_DK[data_DK.availability != '0'] data_DK = data_DK[data_DK.availability != 'partly'] data_DK.head() # # 5. Consolidation of processed country data # In the following, the national datasets are consolidated to a single European dataset. Unfortunately, the Belgian dataset cannot be integrated due to the copyright by the data owner ELIA. # In[ ]: dataframes = [#data_BE, data_NL, data_FR, data_PL, data_CZ, data_CH, data_IT, data_FI, data_ES, data_UK, data_NO, data_SE, data_SK, data_SI, data_AT, data_DK] data_EU = pd.concat(dataframes) # ## 5.1 Implementation of energy source levels # In[ ]: # Import energy source level definition energy_source_levels = pd.read_csv( os.path.join('input', 'energy_source_levels.csv'), index_col=None, header=0) # Merge energy source levels to data set data_EU = data_EU.reset_index().merge( energy_source_levels, how='left', left_on='energy_source', right_on='energy_source_level_1').drop_duplicates( subset=['name', 'city', 'country', 'capacity'], keep='first').set_index('name') data_EU = data_EU.reset_index().merge( energy_source_levels, how='left', left_on='energy_source', right_on='energy_source_level_2').drop_duplicates( subset=['name', 'city', 'country', 'capacity'], keep='first').set_index('name') data_EU = data_EU.reset_index().merge( energy_source_levels, how='left', left_on='energy_source', right_on='energy_source_level_3').drop_duplicates( subset=['name', 'city', 'country', 'capacity'], keep='first').set_index('name') # Combine different energy source levels created by merge data_EU['energy_source_level_1'] = data_EU[ ['energy_source_level_1', 'energy_source_level_1_x', 'energy_source_level_1_y']].fillna('').sum(axis=1) data_EU['energy_source_level_2'] = data_EU[ ['energy_source_level_2', 'energy_source_level_2_y']].fillna('').sum(axis=1) data_EU['energy_source_level_3'] = data_EU[ ['energy_source_level_3']].fillna('').sum(axis=1) # Drop auxiliary columns due to merge colsToDrop = ['energy_source_level_1_y', 'energy_source_level_2_y', 'energy_source_level_3_y', 'energy_source_level_1_x', 'energy_source_level_2_x', 'energy_source_level_3_x'] data_EU = data_EU.drop(colsToDrop, axis=1) data_EU.head() # ## 5.2 Definition of structure and data types # First, we define the ordering of the columns. Secondly, the data types are redefined. At the moment, this has the drawback that empty columns are redefined as `float` instead of `object`. # In[ ]: columns_sorted_output = ['name', 'company', 'street', 'postcode', 'city', 'country', 'capacity', 'energy_source', 'technology', 'chp', 'commissioned', 'type', 'lat', 'lon', 'eic_code', 'energy_source_level_1', 'energy_source_level_2', 'energy_source_level_3', 'additional_info', 'comment', 'source'] # Set ordering of columns data_EU = data_EU.reset_index() data_EU = data_EU.reindex(columns=columns_sorted_output) # Set data types for columns data_EU = data_EU.astype(str) data_EU[['capacity', 'commissioned', 'lat', 'lon']] = data_EU[ ['capacity', 'commissioned', 'lat', 'lon']].astype(float) data_EU.replace('nan', np.nan, inplace=True) # data_EU.dtypes # Set index data_EU = data_EU.set_index('name') data_EU.head() # # 6. Documentation of the data package # We document the data packages meta data in the specific format JSON as proposed by the Open Knowledge Foundation. See the Frictionless Data project by OKFN (http://data.okfn.org/) and the Data Package specifications (http://dataprotocols.org/data-packages/) for more details. # # In order to keep the notebook more readable, we first formulate the metadata in the human-readable YAML format using a multi-line string. We then parse the string into a Python dictionary and save that to disk as a JSON file. # In[ ]: with open(os.path.join('input', 'metadata.yml'), 'r') as f: metadata = yaml.load(f.read()) datapackage_json = json.dumps(metadata, indent=4, separators=(',', ': ')) # # 7. Result export # ## 7.1 Write results to file # In[ ]: output_path = 'output' data_EU.to_csv(os.path.join( output_path, 'conventional_power_plants_EU.csv'), encoding='utf-8', index_label='name') data_EU.to_excel( os.path.join(output_path, 'conventional_power_plants_EU.xlsx'), sheet_name='plants', index_label='name') data_EU.to_sql( 'conventional_power_plants_EU', sqlite3.connect(os.path.join(output_path, 'conventional_power_plants.sqlite')), if_exists="replace", index_label='name') with open(os.path.join(output_path, 'datapackage.json'), 'w') as f: f.write(datapackage_json) # ## 7.2 Write checksums # In[ ]: files = [ 'conventional_power_plants_DE.csv', 'conventional_power_plants_DE.xlsx', 'conventional_power_plants_EU.csv', 'conventional_power_plants_EU.xlsx', 'conventional_power_plants.sqlite'] with open('checksums.txt', 'w') as f: for file_name in files: file_hash = get_sha_hash(os.path.join(output_path, file_name)) f.write('{},{}\n'.format(file_name, file_hash))