#!/usr/bin/env python # coding: utf-8 #
# Renewable power plants: Validation and output notebook # #
This notebook is part of the Renewable power plants Data Package of Open Power System Data. #
# Part 1 of the script (Download and process Notebook) has downloaded and merged the original data. This Notebook subsequently checks, validates the list of renewable power plants and creates CSV/XLSX/SQLite files. It also generates a daily time series of cumulated installed capacities by energy source. # # *(Before running this script make sure you ran Part 1, so that the renewables.pickle files for each country exist in the same folder as the scripts)* # #

Table of Contents

#
# # Initialization # In[ ]: settings = { 'version': '2018-03-08', 'changes': 'Fixing incorrect coordinates in previous version.' } # ## Script setup # In[ ]: import json import logging import os import urllib.parse import re import zipfile import pandas as pd import numpy as np import requests import sqlalchemy import yaml import hashlib import os get_ipython().run_line_magic('matplotlib', 'inline') # Option to make pandas display 40 columns max per dataframe (default is 20) pd.options.display.max_columns = 40 # Create input and output folders if they don't exist os.makedirs(os.path.join('input', 'original_data'), exist_ok=True) os.makedirs('output', exist_ok=True) os.makedirs(os.path.join('output', 'renewable_power_plants'), exist_ok=True) package_path = os.path.join('output', 'renewable_power_plants',settings['version']) os.makedirs(package_path, exist_ok=True) # ## Load data # In[ ]: countries = set(['DE', 'DK','FR','PL','CH']) countries_non_DE = countries - set(['DE']) countries_dirty = set(['DE_outvalidated_plants', 'FR_overseas_territories']) countries_including_dirty = countries | countries_dirty # Read data from script Part 1 download_and_process dfs = {} for country in countries: dfs[country] = pd.read_pickle('intermediate/'+country+'_renewables.pickle') # # Validation Markers # # This section checks the DataFrame for a set of pre-defined criteria and adds markers to the entries in an additional column. The marked data will be included in the output files, but marked, so that they can be easiliy filtered out. For creating the validation plots and the time series, suspect data is skipped. # ## Germany DE # **Add marker to data according to criteria (see validation_marker above)** # In[ ]: mark_rows = {} validation_marker = {} key = 'R_1' mark_rows[key] = ( (dfs['DE']['commissioning_date'] <= '2016-12-31') & (dfs['DE']['data_source'] == 'BNetzA') ) | ( (dfs['DE']['commissioning_date'] <= '2016-12-31') & (dfs['DE']['data_source'] == 'BNetzA_PV') ) | ( (dfs['DE']['commissioning_date'] <= '2016-12-31') & (dfs['DE']['data_source'] == 'BNetzA_PV_historic') ) validation_marker[key] = { "Short explanation": "data_source = BNetzA and commissioning_date < 2016-12-31", "Long explanation": "This powerplant is probably also represented by an entry from the TSO data and should therefore be filtered out." } key = 'R_2' mark_rows[key] = ((dfs['DE']['notification_reason'] != 'Inbetriebnahme') & (dfs['DE']['data_source'] == 'BNetzA')) validation_marker[key] = { "Short explanation": "notification_reason other than commissioning (Inbetriebnahme)", "Long explanation": "This powerplant is probably represented by an earlier entry already (possibly also from the TSO data) and should therefore be filtered out." } key = 'R_3' mark_rows[key] = (dfs['DE']['commissioning_date'].isnull()) validation_marker[key] = { "Short explanation": "commissioning_date not specified", "Long explanation": "" } key = 'R_4' mark_rows[key] = dfs['DE'].electrical_capacity <= 0.0 validation_marker[key] = { "Short explanation": "electrical_capacity not specified", "Long explanation": "" } key = 'R_5' mark_rows[key] = dfs['DE']['grid_decommissioning_date'].isnull() == False # Just the entry which is not double should be kept, thus the other one is marked validation_marker[key] = { "Short explanation": "decommissioned from the grid", "Long explanation": "This powerplant is probably commissioned again to the grid of another grid operator and therefore this doubled entry should be filtered out." } key = 'R_6' mark_rows[key] = dfs['DE']['decommissioning_date'].isnull() == False validation_marker[key] = { "Short explanation": "decommissioned", "Long explanation": "This powerplant is completely decommissioned." } key = 'R_8' # note that we skip R7 here as R7 is used for frech oversees power plants below (we never change meanings of R markers, so R7 stays reserved for that) mark_rows[key] = (dfs['DE'].duplicated(['eeg_id'],keep='first') # note that this depends on BNetzA items to be last in list, because we want to keep the TSO items & (dfs['DE']['eeg_id'].isnull() == False)) validation_marker[key] = { "Short explanation": "duplicate_eeg_id", "Long explanation": "This power plant is twice in the data (e.g. through BNetzA and TSOs)." } dfs['DE']['comment'] = '' for key, rows_to_mark in mark_rows.items(): dfs['DE'].loc[rows_to_mark, 'comment'] += key+"|" del mark_rows, key, rows_to_mark # free variables no longer needed # In[ ]: # Summarize capacity of suspect data by data_source dfs['DE'].groupby(['comment', 'data_source'])['electrical_capacity'].sum().to_frame() # Summarize capacity of suspect data by energy source dfs['DE'].groupby(['comment', 'energy_source_level_2'])['electrical_capacity'].sum().to_frame() # **Create cleaned DataFrame** # # All marked entries are deleted for the cleaned version of the DataFrame that is utilized for creating time series of installation and for the validation plots. # ## France FR # In[ ]: # Create empty marker column dfs['FR']['comment'] = "" key = 'R_7' mark_rows_FR_not_in_Europe = dfs['FR'][((dfs['FR']['lat'] < 41) | (dfs['FR']['lon'] < -6) | (dfs['FR']['lon'] > 10))].index validation_marker[key] = { "Short explanation": "not connected to the European grid", "Long explanation": "This powerplant is located in regions belonging to France but not located in Europe (e.g. Guadeloupe)." } dfs['FR'].loc[mark_rows_FR_not_in_Europe, 'comment'] += key+"|" del mark_rows_FR_not_in_Europe # # Harmonization # ## Harmonizing column order # In[ ]: field_lists = { 'DE': ['commissioning_date', 'decommissioning_date', 'energy_source_level_1', 'energy_source_level_2', 'energy_source_level_3', 'technology', 'electrical_capacity', 'thermal_capacity', 'voltage_level', 'tso', 'dso', 'dso_id', 'eeg_id', 'bnetza_id', 'federal_state', 'postcode', 'municipality_code', 'municipality', 'address', 'address_number', 'utm_zone', 'utm_east', 'utm_north', 'lat', 'lon', 'data_source', 'comment'], 'DK': ['commissioning_date', 'energy_source_level_1', 'energy_source_level_2', 'technology', 'electrical_capacity', 'dso', 'gsrn_id', 'postcode', 'municipality_code', 'municipality', 'address', 'address_number', 'utm_east', 'utm_north', 'lat', 'lon', 'hub_height', 'rotor_diameter', 'manufacturer', 'model', 'data_source'], 'FR': ['municipality_code', 'municipality', 'energy_source_level_1', 'energy_source_level_2', 'energy_source_level_3', 'technology', 'electrical_capacity', 'number_of_installations', 'lat', 'lon', 'data_source', 'comment'], 'PL': ['district', 'energy_source_level_1', 'energy_source_level_2', 'energy_source_level_3', 'technology', 'electrical_capacity', 'number_of_installations', 'lat', 'lon', 'data_source'], 'CH': ['commissioning_date', 'municipality', 'energy_source_level_1', 'energy_source_level_2', 'energy_source_level_3', 'technology', 'electrical_capacity', 'municipality_code', 'project_name', 'production', 'tariff', 'notification_date', 'contract_period_end', 'street', 'canton', 'company', 'lat', 'lon', 'data_source'] } for country in countries: dfs[country] = dfs[country].loc[:, field_lists[country]] # ## Cleaning fields # # Five digits behind the decimal point for decimal fields. Dates should be without timestamp. # In[ ]: cleaning_specs = { 'decimal' : { 'DE': ['electrical_capacity','lat','lon','utm_east','utm_north','thermal_capacity'], 'DK': ['electrical_capacity','lat','lon','utm_east','utm_north'], 'CH': ['electrical_capacity','lat','lon'], 'FR': ['electrical_capacity','lat','lon'], 'PL': ['electrical_capacity'], }, 'integer': { 'DE': ['utm_zone'], }, 'date': { 'DE': ['commissioning_date', 'decommissioning_date'], 'DK': ['commissioning_date'], 'CH': ['commissioning_date'], }, } for cleaning_type, cleaning_spec in cleaning_specs.items(): for country, fields in cleaning_spec.items(): for field in fields: print('Cleaning '+country+'.'+field+' to decimal.') if cleaning_type == 'decimal': dfs[country][field] = dfs[country][field].map(lambda x: round(x, 5)) if cleaning_type == 'integer': dfs[country][field] = pd.to_numeric(dfs[country][field], errors='coerce') dfs[country][field] = dfs[country][field].map(lambda x: '%.0f' % x) if cleaning_type == 'date': dfs[country][field] = dfs[country][field].apply(lambda x: x.date()) del cleaning_specs # ## Sort # In[ ]: sort_by = { 'DE': 'commissioning_date', 'DK': 'commissioning_date', 'CH': 'commissioning_date', 'FR': 'municipality_code', 'PL': 'district', } for country, sort_by in sort_by.items(): dfs[country] = dfs[country].iloc[dfs[country][sort_by].sort_values().index] del sort_by # ## Leave unspecified cells blank # In[ ]: for country in countries: dfs[country].fillna('', inplace=True) # ## Separate dirty from clean # # We separate all plants which have a validation marker in the comments column into a separate DataFrame and eventually also in a separate CSV file, so the main country files only contain "clean" plants, i.e. those without any special comment. This is useful since all our comments denote that most people would probably not like to include them in their calculations. # In[ ]: for country in ['DE','FR']: idx_dirty = dfs[country][dfs[country].comment.str.len() > 1].index dirty_key = 'DE_outvalidated_plants' if country=='DE' else 'FR_overseas_territories' dfs[dirty_key] = dfs[country].loc[idx_dirty] dfs[country] = dfs[country].drop(idx_dirty) del idx_dirty, dirty_key # # Capacity time series # # This section creates a daily and yearly time series of the cumulated installed capacity by energy source. This data will be part of the output and will be compared in a plot for validation in the next section. # In[ ]: # Additional column for chosing energy sources for time series dfs['DE']['temp_energy_source'] = dfs['DE']['energy_source_level_2'] # Time series for on- and offshore wind should be separated, for hydro subtype # should be used because all is run-of-river idx_subtype = dfs['DE'][(dfs['DE'].energy_source_level_2 == 'Wind') | (dfs['DE'].energy_source_level_2 == 'Hydro')].index dfs['DE'].loc[idx_subtype, 'temp_energy_source'] = dfs['DE'].loc[ idx_subtype, 'technology'] # Set energy source for which time series should be generated energy_sources = ['Solar', 'Onshore', 'Offshore', 'Bioenergy', 'Geothermal', 'Run-of-river'] # Set range of time series as index timeseries_yearly = pd.DataFrame(index=pd.date_range(start='1990-01-01', end='2017-12-31', freq='A')) timeseries_daily = pd.DataFrame(index=pd.date_range(start='2005-01-01', end='2017-12-31', freq='D')) del idx_subtype # In[ ]: # Create cumulated time series per energy source for both yearly and daily time series for gtype in energy_sources: temp = (dfs['DE'][['commissioning_date', 'electrical_capacity']] .loc[dfs['DE']['temp_energy_source'].isin([gtype])]) temp_timeseries = temp.set_index('commissioning_date') temp_timeseries.index = pd.DatetimeIndex(temp_timeseries.index) # Create cumulated time series per energy_source and year timeseries_yearly['{0}'.format(gtype)] = temp_timeseries.resample('A').sum().cumsum().fillna(method='ffill') # Create cumulated time series per energy_source and day timeseries_daily['{0}'.format(gtype)] = temp_timeseries.resample('D').sum().cumsum().fillna(method='ffill') del energy_sources dfs['DE'].drop('temp_energy_source',axis=1,inplace=True) # In[ ]: # Filling a few timeseries with forward-fill, as some did not work in the loop timeseries_daily.Onshore = timeseries_daily.Onshore.fillna(method='ffill') timeseries_daily.Offshore = timeseries_daily.Offshore.fillna(method='ffill') timeseries_daily.Bioenergy = timeseries_daily.Bioenergy.fillna(method='ffill') timeseries_daily.Geothermal = timeseries_daily.Geothermal.fillna(method='ffill') timeseries_daily['Run-of-river'] = timeseries_daily['Run-of-river'].fillna(method='ffill') # Shorten timestamp to year for the yearly time series timeseries_yearly.index = pd.to_datetime(timeseries_yearly.index, format="%Y").year # Show yearly timeseries of installed capacity in MW per energy source level 2 timeseries_yearly # **Reset index of timeseries.** # In[ ]: # Time index is not required any more timeseries_yearly = timeseries_yearly.reset_index() timeseries_daily = timeseries_daily.reset_index() # Set index name timeseries_yearly.rename(columns={'index': 'year'}, inplace=True) timeseries_daily.rename(columns={'index': 'day'}, inplace=True) # # Output # This section finally writes the Data Package: # * CSV + XLSX + SQLite # * Meta data (JSON) # In[ ]: os.makedirs(package_path, exist_ok=True) # ## Write data files # ** Write CSV-files** # # One csv-file for each country. This process will take some time depending on you hardware. # In[ ]: table_names = {} for country in countries_including_dirty: table_names[country] = 'renewable_power_plants_'+country if country not in countries_dirty else 'res_plants_separated_'+country dfs[country].to_csv(os.path.join(package_path, table_names[country]+'.csv'), sep=',', decimal='.', date_format='%Y-%m-%d', line_terminator='\n', encoding='utf-8', index=False) # Write daily cumulated time series as csv timeseries_daily.to_csv(os.path.join(package_path, 'renewable_capacity_timeseries_DE.csv'), sep=',', float_format='%.3f', decimal='.', date_format='%Y-%m-%d', encoding='utf-8', index=False) # Write csv of Marker Explanations validation_marker_df = pd.DataFrame(validation_marker).transpose() validation_marker_df = validation_marker_df.iloc[:, ::-1] # Reverse column order validation_marker_df.index.name = 'Validation marker' validation_marker_df.reset_index(inplace=True) validation_marker_df.to_csv(os.path.join(package_path, 'validation_marker.csv'), sep=',', decimal='.', date_format='%Y-%m-%d', line_terminator='\n', encoding='utf-8', index=False) # ** Write XLSX-file** # # This process will take some time depending on your hardware. # # All country power plant list will be written in one xlsx-file. Each country power plant list is written in a separate sheet. As the German power plant list has to many entries for one sheet, it will be split in two. An additional sheet includes the explanations of the marker. # In[ ]: # Write the results as xlsx file get_ipython().run_line_magic('time', "writer = pd.ExcelWriter(os.path.join(package_path, 'renewable_power_plants.xlsx'), engine='xlsxwriter', date_format='yyyy-mm-dd')") print('Writing DE part 1') get_ipython().run_line_magic('time', "dfs['DE'][:1000000].to_excel(writer, index=False, sheet_name='DE part-1')") print('Writing DE part 2') get_ipython().run_line_magic('time', "dfs['DE'][1000000:].to_excel(writer, index=False, sheet_name='DE part-2')") for country in (countries_non_DE | countries_dirty): print('Writing '+country) get_ipython().run_line_magic('time', 'dfs[country].to_excel(writer, index=False, sheet_name=country)') print('Writing validation marker sheet') get_ipython().run_line_magic('time', "validation_marker_df.to_excel(writer, index=False, sheet_name='validation_marker')") print('Saving...') get_ipython().run_line_magic('time', 'writer.save()') print('...done!') # **Write SQLite** # In[ ]: # The decommissioning_date column is giving the engine some trouble, therefore cast to string: #dfs['DE'].decommissioning_date = dfs['DE'].decommissioning_date.astype(str) #dfs['DE'].commissioning_date = dfs['DE'].commissioning_date.astype(str) # Using chunksize parameter is for lower # memory computers. Removing it might speed things up. engine = sqlalchemy.create_engine('sqlite:///' + package_path + '/renewable_power_plants.sqlite') for country in countries_including_dirty: get_ipython().run_line_magic('', 'time dfs[country].to_sql(table_names[country],engine,if_exists="replace",chunksize=100000,index=False)') validation_marker_df.to_sql('validation_marker', engine, if_exists="replace", chunksize=100000, index=False) timeseries_daily.to_sql('renewable_capacity_timeseries_DE', engine, if_exists="replace", chunksize=100000, index=False) # ## Write meta data # # The Data Packages meta data are created in the specific JSON format as proposed by the Open Knowledge Foundation. Please see the Frictionless Data project by OKFN (http://data.okfn.org/) and the Data Package specifications (http://dataprotocols.org/data-packages/) for more details. # # In order to keep the Jupyter Notebook more readable the metadata is written in the human-readable YAML format using a multi-line string and then parse the string into a Python dictionary and save it as a JSON file. # In[ ]: metadata = """ hide: yes name: opsd-renewable-power-plants title: Renewable power plants description: List of renewable energy power stations long_description: >- This Data Package contains a list of renewable energy power plants in lists of renewable energy-based power plants of Germany, Denmark, France and Poland. Germany: More than 1.7 million renewable power plant entries, eligible under the renewable support scheme (EEG). Denmark: Wind and phovoltaic power plants with a high level of detail. France: Aggregated capacity and number of installations per energy source per municipality (Commune). Poland: Summed capacity and number of installations per energy source per municipality (Powiat). Switzerland: Renewable power plants eligible under the Swiss feed in tariff KEV (Kostendeckende Einspeisevergütung) Due to different data availability, the power plant lists are of different accurancy and partly provide different power plant parameter. Due to that, the lists are provided as seperate csv-files per country and as separate sheets in the excel file. Suspect data or entries with high probability of duplication are marked in the column 'comment'. Theses validation markers are explained in the file validation_marker.csv. Filtering all entries with comments out results in the recommended data set. Additionally, the Data Package includes a daily time series of cumulated installed capacity per energy source type for Germany. All data processing is conducted in Python and pandas and has been documented in the Jupyter Notebooks linked below. keywords: [master data register,power plants,renewables,germany,denmark,france,poland,switzerland,open power system data] geographical-scope: Germany, Denmark, France, Poland, Switzerland resources: - path: renewable_power_plants_DE.csv format: csv encoding: UTF-8 missingValue: "" schema: fields: - name: commissioning_date type: date format: YYYY-MM-DD description: Date of commissioning of specific unit - name: decommissioning_date type: date format: YYYY-MM-DD description: Date of decommissioning of specific unit - name: energy_source_level_1 description: Type of energy source (e.g. Renewable energy) type: string - name: energy_source_level_2 description: Type of energy source (e.g. Wind, Solar) type: string opsd-contentfilter: "true" - name: energy_source_level_3 description: Subtype of energy source (e.g. Biomass and biogas) type: string - name: technology description: Technology to harvest energy source (e.g. Onshore, Photovoltaics) type: string - name: electrical_capacity description: Installed electrical capacity in MW type: number format: float unit: MW - name: thermal_capacity description: Installed thermal capacity in MW type: number format: float unit: MW - name: voltage_level description: Voltage level of grid connection type: string - name: tso description: Name of transmission system operator of the area the plant is located type: string - name: dso description: Name of distribution system operator of the region the plant is located in type: string - name: dso_id description: Company number of German distribution grid operator type: string - name: eeg_id description: Power plant EEG (German feed-in tariff law) remuneration number type: string - name: bnetza_id description: Power plant identification number by BNetzA type: string - name: federal_state description: Name of German administrative level 'Bundesland' type: string - name: postcode description: German zip-code type: string - name: municipality_code description: German Gemeindenummer (municipalitiy number) type: string - name: municipality description: Name of German Gemeinde (municipality) type: string - name: address description: Street name or name of land parcel type: string - name: address_number description: House number or number of land parcel type: string - name: utm_zone description: Universal Transverse Mercator zone value type: - name: utm_east description: Coordinate in Universal Transverse Mercator (east) type: numeric format: float - name: utm_north description: Coordinate in Universal Transverse Mercator (north) type: numeric format: float - name: lat description: Latitude coordinates type: geopoint format: lat - name: lon description: Longitude coordinates type: geopoint format: lon - name: data_source description: Source of database entry type: string - name: comment description: Shortcodes for comments related to this entry, explanation can be looked up in validation_marker.csv type: string - path: renewable_power_plants_DK.csv format: csv encoding: UTF-8 missingValue: "" schema: fields: - name: commissioning_date type: date format: YYYY-MM-DD - name: energy_source_level_1 description: Type of energy source (e.g. Renewable energy) type: string - name: energy_source_level_2 description: Type of energy source (e.g. Wind, Solar) type: string opsd-contentfilter: "true" - name: technology description: Technology to harvest energy source (e.g. Onshore, Photovoltaics) type: string - name: electrical_capacity description: Installed electrical capacity in MW type: number format: float - name: dso description: Name of distribution system operator of the region the plant is located in type: string - name: gsrn_id description: Danish wind turbine identifier number (GSRN) type: number format: integer - name: postcode description: Danish zip-code type: string - name: municipality_code description: Danish 3-digit Kommune-Nr type: string - name: municipality description: Name of Danish Kommune type: string - name: address description: Street name or name of land parcel type: string - name: address_number description: House number or number of land parcel type: string - name: utm_east description: Coordinate in Universal Transverse Mercator (east) type: numeric format: float - name: utm_north description: Coordinate in Universal Transverse Mercator (north) type: numeric format: float - name: lat description: Latitude coordinates type: geopoint format: lat - name: lon description: Longitude coordinates type: geopoint format: lon - name: hub_height description: Wind turbine hub heigth in m type: numeric format: float - name: rotor_diameter description: Wind turbine rotor diameter in m type: numeric format: float - name: manufacturer description: Company that has built the wind turbine type: string - name: model description: Wind turbind model type type: string - name: data_source description: Source of database entry type: string - path: renewable_power_plants_FR.csv format: csv encoding: UTF-8 missingValue: "" schema: fields: - name: municipality_code description: French 5-digit INSEE code for Communes type: string - name: municipality description: Name of French Commune type: string - name: energy_source_level_1 description: Type of energy source (e.g. Renewable energy) type: string - name: energy_source_level_2 description: Type of energy source (e.g. Wind, Solar) type: string opsd-contentfilter: "true" - name: energy_source_level_3 description: Subtype of energy source (e.g. Biomass and biogas) type: string - name: technology description: Technology to harvest energy source (e.g. Onshore, Photovoltaics) type: string - name: electrical_capacity description: Installed electrical capacity in MW type: number format: float - name: number_of_installations description: Number of installations of the energy source subtype in the municipality type: number format: integer - name: lat description: Latitude coordinates type: geopoint format: lat - name: lon description: Longitude coordinates type: geopoint format: lon - name: data_source description: Source of database entry type: string - path: renewable_power_plants_PL.csv format: csv encoding: UTF-8 missingValue: "" schema: fields: - name: district description: Name of the Polish powiat type: string - name: energy_source_level_1 description: Type of energy source (e.g. Renewable energy) type: string - name: energy_source_level_2 description: Type of energy source (e.g. Wind, Solar) type: string opsd-contentfilter: "true" - name: energy_source_level_3 description: Subtype of energy source (e.g. Biomass and biogas) type: string - name: technology description: Technology to harvest energy source (e.g. Onshore, Photovoltaics) type: string - name: electrical_capacity description: Installed electrical capacity in MW type: number format: float - name: number_of_installations description: Number of installations of the energy source subtype in the district type: number format: integer - name: data_source description: Source of database entry type: string - path: renewable_power_plants_CH.csv format: csv encoding: UTF-8 missingValue: "" schema: fields: - name: commissioning_date type: date format: YYYY-MM-DD - name: municipality type: string - name: energy_source_level_1 description: Type of energy source (e.g. Renewable energy) type: string - name: energy_source_level_2 description: Type of energy source (e.g. Wind, Solar) type: string opsd-contentfilter: "true" - name: technology description: Technology to harvest energy source (e.g. Onshore, Photovoltaics) type: string - name: electrical_capacity description: Installed electrical capacity in MW type: number format: float - name: municipality_code type: number format: integer - name: project_name description: name of the project type: string - name: production description: yearly production in MWh type: numeric format: float - name: tariff description: tariff in CHF for 2016 type: numeric format: float - name: notification_date description: date of data entriy at BFE type: date format: YYYY-MM-DD HH:MM:SS.SSSSS - name: street description: Street name type: string - name: canton description: name of the cantones/ member states of the Swiss conferderation type: string - name: company description: name of the company type: string - name: lat description: Latitude coordinates type: geopoint format: lat - name: lon description: Longitude coordinates type: geopoint format: lon - name: data_source description: Source of database entry type: string - path: res_plants_separated_DE_outvalidated_plants.csv format: csv encoding: UTF-8 missingValue: "" schema: fields: - name: commissioning_date type: date format: YYYY-MM-DD description: Date of commissioning of specific unit - name: decommissioning_date type: date format: YYYY-MM-DD description: Date of decommissioning of specific unit - name: energy_source_level_1 description: Type of energy source (e.g. Renewable energy) type: string - name: energy_source_level_2 description: Type of energy source (e.g. Wind, Solar) type: string opsd-contentfilter: "true" - name: energy_source_level_3 description: Subtype of energy source (e.g. Biomass and biogas) type: string - name: technology description: Technology to harvest energy source (e.g. Onshore, Photovoltaics) type: string - name: electrical_capacity description: Installed electrical capacity in MW type: number format: float unit: MW - name: thermal_capacity description: Installed thermal capacity in MW type: number format: float unit: MW - name: voltage_level description: Voltage level of grid connection type: string - name: tso description: Name of transmission system operator of the area the plant is located type: string - name: dso description: Name of distribution system operator of the region the plant is located in type: string - name: dso_id description: Company number of German distribution grid operator type: string - name: eeg_id description: Power plant EEG (German feed-in tariff law) remuneration number type: string - name: bnetza_id description: Power plant identification number by BNetzA type: string - name: federal_state description: Name of German administrative level 'Bundesland' type: string - name: postcode description: German zip-code type: string - name: municipality_code description: German Gemeindenummer (municipalitiy number) type: string - name: municipality description: Name of German Gemeinde (municipality) type: string - name: address description: Street name or name of land parcel type: string - name: address_number description: House number or number of land parcel type: string - name: utm_zone description: Universal Transverse Mercator zone value type: - name: utm_east description: Coordinate in Universal Transverse Mercator (east) type: numeric format: float - name: utm_north description: Coordinate in Universal Transverse Mercator (north) type: numeric format: float - name: lat description: Latitude coordinates type: geopoint format: lat - name: lon description: Longitude coordinates type: geopoint format: lon - name: data_source description: Source of database entry type: string - name: comment description: Shortcodes for comments related to this entry, explanation can be looked up in validation_marker.csv type: string - path: res_plants_separated_FR_overseas_territories.csv format: csv encoding: UTF-8 missingValue: "" schema: fields: - name: municipality_code description: French 5-digit INSEE code for Communes type: string - name: municipality description: Name of French Commune type: string - name: energy_source_level_1 description: Type of energy source (e.g. Renewable energy) type: string - name: energy_source_level_2 description: Type of energy source (e.g. Wind, Solar) type: string opsd-contentfilter: "true" - name: energy_source_level_3 description: Subtype of energy source (e.g. Biomass and biogas) type: string - name: technology description: Technology to harvest energy source (e.g. Onshore, Photovoltaics) type: string - name: electrical_capacity description: Installed electrical capacity in MW type: number format: float - name: number_of_installations description: Number of installations of the energy source subtype in the municipality type: number format: integer - name: lat description: Latitude coordinates type: geopoint format: lat - name: lon description: Longitude coordinates type: geopoint format: lon - name: data_source description: Source of database entry type: string - path: renewable_power_plants.xlsx format: xlsx - path: validation_marker.csv format: csv encoding: UTF-8 mediatype: text/csv missingValue: "" schema: fields: - name: Validation_Marker description: Name of validation marker utilized in column comment in the renewable_power_plant_germany.csv type: string - name: Explanation description: Comment explaining meaning of validation marker type: string - path: renewable_capacity_timeseries_DE.csv format: csv encoding: UTF-8 mediatype: text/csv missingValue: "" schema: fields: - name: description: Day type: datetime format: YYYY-MM-DD - name: Solar description: Cumulated electrical solar capacity in MW type: number format: float unit: MW - name: Onshore description: Cumulated electrical wind onshore capacity in MW type: number format: float unit: MW - name: Offshore description: Cumulated electrical wind offshore capacity in MW type: number format: float unit: MW - name: Bioenergy and renewable waste description: Cumulated electrical bioenergy and renewable waste capacity in MW type: number format: float unit: MW - name: Geothermal description: Cumulated electrical geothermal capacity in MW type: number format: float unit: MW - name: Run-of-river description: Cumulated electrical run-of-river capacity in MW type: number format: float unit: MW licenses: - type: MIT license url: http://www.opensource.org/licenses/MIT sources: - name: BNetzA web: http://www.bundesnetzagentur.de/cln_1422/DE/Sachgebiete/ElektrizitaetundGas/Unternehmen_Institutionen/ErneuerbareEnergien/Anlagenregister/Anlagenregister_Veroeffentlichung/Anlagenregister_Veroeffentlichungen_node.html source: Bundesnetzagentur register of renewable power plants (excl. PV) - name: BNetzA_PV web: http://www.bundesnetzagentur.de/cln_1431/DE/Sachgebiete/ElektrizitaetundGas/Unternehmen_Institutionen/ErneuerbareEnergien/Photovoltaik/DatenMeldgn_EEG-VergSaetze/DatenMeldgn_EEG-VergSaetze_node.html source: Bundesnetzagentur register of PV power plants - name: TransnetBW, TenneT, Amprion, 50Hertz, Netztransparenz.de web: https://www.netztransparenz.de/de/Anlagenstammdaten.htm source: Netztransparenz.de - information platform of German TSOs (register of renewable power plants in their control area) - name: Postleitzahlen Deutschland web: http://www.suche-postleitzahl.org/downloads source: Zip codes of Germany linked to geo-information - name: Energinet.dk web: http://www.energinet.dk/SiteCollectionDocuments/Danske%20dokumenter/El/SolcelleGraf.xlsx source: register of Danish wind power plants - name: Energistyrelsen web: http://www.ens.dk/sites/ens.dk/files/info/tal-kort/statistik-noegletal/oversigt-energisektoren/stamdataregister-vindmoeller/anlaegprodtilnettet.xls source: ens.dk - register of Danish PV power plants - name: GeoNames web: http://download.geonames.org/export/zip/ source: geonames.org - name: Ministry for the Ecological and Inclusive Transition web: http://www.statistiques.developpement-durable.gouv.fr/energie-climat/r/differentes-energies-energies-renouvelables.html?tx_ttnews[tt_news]=25029&cHash=005200fdf3c7976410f38ae53cd17e0b - name: OpenDataSoft web: http://public.opendatasoft.com/explore/dataset/correspondance-code-insee-code-postal/download/'\ '?format=csv&refine.statut=Commune%20simple&timezone=Europe/Berlin&use_labels_for_header=true source: Code Postal - Code INSEE - name: Urzad Regulacji Energetyki (URE) web: http://www.ure.gov.pl/uremapoze/mapa.html source: Energy Regulatory Office of Poland - name: Bundesamt für Energie (BFE) web: http://www.bfe.admin.ch/themen/00612/02073/index.html?dossier_id=02166&lang=de source: Swiss Federal Office of Energy contributors: - name: Ingmar Schlecht email: schlecht@neon-energie.de web: http://open-power-system-data.org/ views: True openpowersystemdata-enable-listing: True """ metadata = yaml.load(metadata) metadata['last_changes'] = settings['changes'] metadata['version'] = settings['version'] metadata['documentation'] = 'https://github.com/Open-Power-System-Data/renewable_power_plants/blob/'+settings['version']+'/main.ipynb' datapackage_json = json.dumps(metadata, indent=4, separators=(',', ': ')) # Write the information of the metadata with open(os.path.join(package_path, 'datapackage.json'), 'w') as f: f.write(datapackage_json) # ## Generate checksums # # Generates checksums.txt # In[ ]: def get_sha_hash(path, blocksize=65536): sha_hasher = hashlib.sha256() with open(path, 'rb') as f: buffer = f.read(blocksize) while len(buffer) > 0: sha_hasher.update(buffer) buffer = f.read(blocksize) return sha_hasher.hexdigest() files = [ 'validation_marker.csv', 'renewable_capacity_timeseries_DE.csv', 'renewable_power_plants.sqlite', 'renewable_power_plants.xlsx', ] for country in countries_including_dirty: files.append(table_names[country]+'.csv') with open('checksums.txt', 'w') as f: for file_name in sorted(files): file_hash = get_sha_hash(os.path.join(package_path, file_name)) f.write('{},{}\n'.format(file_name, file_hash)) # In[ ]: