#!/usr/bin/env python
# coding: utf-8
#
# Part 1 of the script (Download and process Notebook) has downloaded and merged the original data. This Notebook subsequently checks, validates the list of renewable power plants and creates CSV/XLSX/SQLite files. It also generates a daily time series of cumulated installed capacities by energy source.
#
# *(Before running this script make sure you ran Part 1, so that the renewables.pickle files for each country exist in the same folder as the scripts)*
#
# Table of Contents
#
# # Initialization
# In[ ]:
settings = {
'version': '2019-04-05',
'changes': 'Updated all countries with new data available (DE, FR, CH, DK), added data for UK and expanded renewable capacity timeseries to more countries (DK, UK, CH in addition to DE).'
}
# ## Script setup
# In[ ]:
import json
import logging
import os
import urllib.parse
import re
import zipfile
import pandas as pd
import numpy as np
import requests
import sqlalchemy
import yaml
import hashlib
import os
import fiona
import cartopy.io.shapereader as shpreader
import shapely.geometry as sgeom
from shapely.prepared import prep
from shapely.ops import unary_union
import fake_useragent
get_ipython().run_line_magic('matplotlib', 'inline')
# Option to make pandas display 40 columns max per dataframe (default is 20)
pd.options.display.max_columns = 40
# Create input and output folders if they don't exist
os.makedirs(os.path.join('input', 'original_data'), exist_ok=True)
os.makedirs('output', exist_ok=True)
os.makedirs(os.path.join('output', 'renewable_power_plants'), exist_ok=True)
package_path = os.path.join('output', 'renewable_power_plants',settings['version'])
os.makedirs(package_path, exist_ok=True)
# ## Load data
# In[ ]:
countries = set(['DE', 'DK','FR','PL','CH', 'UK'])
countries_non_DE = countries - set(['DE'])
countries_dirty = set(['DE_outvalidated_plants', 'FR_overseas_territories'])
countries_including_dirty = countries | countries_dirty
# Read data from script Part 1 download_and_process
dfs = {}
for country in countries:
dfs[country] = pd.read_pickle('intermediate/'+country+'_renewables.pickle')
# ## Download coastline data
#
# The coastline shapefile is needed to check if the geocoordinates of the land powerplants point to a land location, and conversely, if the geocoordinates of the onshore facilities point to a location not on land.
# In[ ]:
coastline_url = 'https://www.ngdc.noaa.gov/mgg/shorelines/data/gshhg/latest/gshhg-shp-2.3.7.zip'
user_agent = fake_useragent.UserAgent()
directory_path = os.path.join('input', 'maps', 'coastline')
os.makedirs(directory_path, exist_ok=True)
filepath = os.path.join(directory_path, 'gshhg-shp-2.3.7.zip')
# check if the file exists; if not, download it
if not os.path.exists(filepath):
session = requests.session()
print(coastline_url)
print('Downloading...')
headers = {'User-Agent' : user_agent.random}
r = session.get(coastline_url, headers=headers, stream=True)
total_size = r.headers.get('content-length')
total_size = int(total_size)
chuncksize = 4096
with open(filepath, 'wb') as file:
downloaded = 0
for chunck in r.iter_content(chuncksize):
file.write(chunck)
downloaded += chuncksize
print('\rProgress: {:.2f}%'.format(100 * downloaded / float(total_size)), end='')
print(' Done.')
zip_ref = zipfile.ZipFile(filepath, 'r')
zip_ref.extractall(directory_path)
zip_ref.close()
else:
print('The file is already there:', filepath)
filepath = '' + filepath
coastline_shapefile_path = os.path.join('input', 'maps', 'coastline', 'GSHHS_shp', 'f', 'GSHHS_f_L1.shp')
print("Shapefile path: ", coastline_shapefile_path)
# ### Load the list of sources
# In[ ]:
source_df = pd.read_csv(os.path.join('input', 'sources.csv'))
source_df
# # Validation Markers
#
# This section checks the DataFrame for a set of pre-defined criteria and adds markers to the entries in an additional column. The marked data will be included in the output files, but marked, so that they can be easiliy filtered out. For creating the validation plots and the time series, suspect data is skipped.
# In[ ]:
validation_marker = {}
mark_rows = {}
# ## Germany DE
# **Add marker to data according to criteria (see validation_marker above)**
# In[ ]:
# It seems that some DE.commissioning_date values are integers, which causes
# the parts of the code dealing with dates to break.
integer_dates_mask = dfs['DE'].apply(lambda row: type(row['commissioning_date']) is int, axis=1).values
print("Integer dates")
display(dfs['DE'][integer_dates_mask])
dfs['DE']=dfs['DE'][~integer_dates_mask]
dfs['DE'].reset_index(drop=True, inplace=True)
# In[ ]:
# In[ ]:
key = 'R_1'
cutoff_date_bnetza = '2017-12-31'
cutoff_date_bnetza = pd.Timestamp(2017, 12, 31)
mark_rows[key] = (dfs['DE']['commissioning_date'] <= cutoff_date_bnetza) &\
(dfs['DE']['data_source'].isin(['BNetzA', 'BNetzA_PV', 'BNetzA_PV_historic']))
validation_marker[key] = {
"Short explanation": "data_source = BNetzA and commissioning_date < " + str(cutoff_date_bnetza.date()),
"Long explanation": "This powerplant is probably also represented by an entry from the TSO data and should therefore be filtered out."
}
# In[ ]:
key = 'R_2'
mark_rows[key] = ((dfs['DE']['notification_reason'] != 'Inbetriebnahme') &
(dfs['DE']['data_source'] == 'BNetzA'))
validation_marker[key] = {
"Short explanation": "notification_reason other than commissioning (Inbetriebnahme)",
"Long explanation": "This powerplant is probably represented by an earlier entry already (possibly also from the TSO data) and should therefore be filtered out."
}
key = 'R_3'
mark_rows[key] = (dfs['DE']['commissioning_date'].isnull())
validation_marker[key] = {
"Short explanation": "commissioning_date not specified",
"Long explanation": ""
}
key = 'R_4'
mark_rows[key] = dfs['DE'].electrical_capacity <= 0.0
validation_marker[key] = {
"Short explanation": "electrical_capacity not specified",
"Long explanation": ""
}
key = 'R_5'
mark_rows[key] = dfs['DE']['grid_decommissioning_date'].isnull() == False # Just the entry which is not double should be kept, thus the other one is marked
validation_marker[key] = {
"Short explanation": "decommissioned from the grid",
"Long explanation": "This powerplant is probably commissioned again to the grid of another grid operator and therefore this doubled entry should be filtered out."
}
key = 'R_6'
mark_rows[key] = dfs['DE']['decommissioning_date'].isnull() == False
validation_marker[key] = {
"Short explanation": "decommissioned",
"Long explanation": "This powerplant is completely decommissioned."
}
key = 'R_8' # note that we skip R7 here as R7 is used for frech oversees power plants below (we never change meanings of R markers, so R7 stays reserved for that)
mark_rows[key] = (dfs['DE'].duplicated(['eeg_id'],keep='first') # note that this depends on BNetzA items to be last in list, because we want to keep the TSO items
& (dfs['DE']['eeg_id'].isnull() == False))
validation_marker[key] = {
"Short explanation": "duplicate_eeg_id",
"Long explanation": "This power plant is twice in the data (e.g. through BNetzA and TSOs)."
}
# In[ ]:
dfs['DE']['comment'] = ''
for key, rows_to_mark in mark_rows.items():
dfs['DE'].loc[rows_to_mark, 'comment'] += key+"|"
del mark_rows, key, rows_to_mark # free variables no longer needed
# In[ ]:
# Summarize capacity of suspect data by data_source
display(dfs['DE'].groupby(['comment', 'data_source'])['electrical_capacity'].sum().to_frame())
# Summarize capacity of suspect data by energy source
dfs['DE'].groupby(['comment', 'energy_source_level_2'])['electrical_capacity'].sum().to_frame()
# **Create cleaned DataFrame**
#
# All marked entries are deleted for the cleaned version of the DataFrame that is utilized for creating time series of installation and for the validation plots.
# ## France FR
# In[ ]:
# Create empty marker column
dfs['FR']['comment'] = ""
key = 'R_7'
mark_rows_FR_not_in_Europe = dfs['FR'][((dfs['FR']['lat'] < 41) |
(dfs['FR']['lon'] < -6) |
(dfs['FR']['lon'] > 10))].index
validation_marker[key] = {
"Short explanation": "not connected to the European grid",
"Long explanation": "This powerplant is located in regions belonging to France but not located in Europe (e.g. Guadeloupe)."
}
dfs['FR'].loc[mark_rows_FR_not_in_Europe, 'comment'] += key+"|"
del mark_rows_FR_not_in_Europe
# ## United Kingdom UK
# In[ ]:
# Create an empty marker column
dfs['UK']['comment'] = ""
# Create a function to check if an offshore powerplant is not wind
geoms = fiona.open(coastline_shapefile_path)
land_geom = sgeom.MultiPolygon([sgeom.shape(geom['geometry']) for geom in geoms])
land = prep(land_geom)
def not_on_land_but_should_be(powerplant_data):
longitude = powerplant_data['lon']
latitude = powerplant_data['lat']
if pd.isnull(longitude) or pd.isnull(latitude):
return False
not_on_land = not land.contains(sgeom.Point(longitude, latitude))
offshore_ok = 'Offshore' in [powerplant_data['region'], powerplant_data['municipality']] or \
(powerplant_data['energy_source_level_2'] in ['Wind', 'Marine'])
return not_on_land and not offshore_ok
key = 'R_9'
validation_marker[key] = {
"Short explanation": "Not on land, but should be.",
"Long explanation": "The geocoordinates of this powerplant indicate that it is not on the UK mainland, but the facility is not an offshore wind farm."
}
mark_rows_UK_not_on_land = dfs['UK'].apply(lambda row: not_on_land_but_should_be(row), axis=1)
dfs['UK'].loc[mark_rows_UK_not_on_land, 'comment'] += key+"|"
del mark_rows_UK_not_on_land
# # Harmonization
# ## Harmonizing column order
# In[ ]:
field_lists = {
'DE': ['commissioning_date', 'decommissioning_date', 'energy_source_level_1', 'energy_source_level_2','energy_source_level_3', 'technology',
'electrical_capacity', 'voltage_level', 'tso', 'dso', 'dso_id', 'eeg_id',
'federal_state', 'postcode', 'municipality_code', 'municipality', 'address',
'lat', 'lon', 'data_source', 'comment'],
'DK': ['commissioning_date', 'energy_source_level_1', 'energy_source_level_2', 'energy_source_level_3', 'technology', 'electrical_capacity',
'dso', 'gsrn_id', 'postcode', 'municipality_code', 'municipality', 'address',
'lat', 'lon', 'hub_height', 'rotor_diameter', 'manufacturer', 'model', 'data_source'],
'FR': ['municipality_code', 'municipality', 'energy_source_level_1', 'energy_source_level_2', 'energy_source_level_3',
'technology', 'electrical_capacity', 'number_of_installations', 'lat', 'lon', 'data_source', 'as_of_year', 'comment'],
'PL': ['district', 'energy_source_level_1', 'energy_source_level_2', 'energy_source_level_3', 'technology',
'electrical_capacity', 'number_of_installations', 'data_source', 'as_of_year'],
'CH': ['commissioning_date', 'municipality', 'energy_source_level_1', 'energy_source_level_2', 'energy_source_level_3',
'technology','electrical_capacity', 'municipality_code', 'project_name', 'production', 'tariff',
'contract_period_end', 'street', 'canton', 'company', 'lat', 'lon', 'data_source'],
'UK': ['commissioning_date', 'uk_beis_id', 'site_name', 'operator',
'energy_source_level_1', 'energy_source_level_2', 'energy_source_level_3', 'technology',
'electrical_capacity', 'chp', 'capacity_individual_turbine',
'number_of_turbines', 'solar_mounting_type', 'address', 'municipality', 'region', 'country', 'postcode',
'lat', 'lon', 'data_source', 'comment']
}
for country in field_lists:
for field in field_lists[country]:
if field not in dfs[country].columns:
print(country, field)
dfs[country] = dfs[country].loc[:, field_lists[country]]
# ## Cleaning fields
#
# Five digits behind the decimal point for decimal fields. Dates should be without timestamp.
# In[ ]:
dfs['DE']['address'][~dfs['DE']['address'].isnull()]
dfs['DK'].columns
# In[ ]:
cleaning_specs = {
'decimal' : {
'DE': ['electrical_capacity','lat','lon'],
'DK': ['electrical_capacity','lat','lon'],
'CH': ['electrical_capacity','lat','lon'],
'FR': ['electrical_capacity','lat','lon'],
'PL': ['electrical_capacity'],
'UK': ['electrical_capacity', 'lat', 'lon']
},
'integer': {
'DE': ['municipality_code'],
'UK': ['uk_beis_id']
},
'date': {
'DE': ['commissioning_date', 'decommissioning_date'],
'DK': ['commissioning_date'],
'CH': ['commissioning_date'],
'UK': ['commissioning_date']
},
'one-line string': {
'DE' : ['federal_state', 'municipality', 'address'],
'DK' : ['municipality', 'address', 'manufacturer', 'model'],
'FR' : ['municipality'],
'PL' : ['district'],
'CH' : ['municipality', 'project_name', 'canton', 'street', 'company'],
'UK' : ['address', 'municipality', 'site_name', 'region']
}
}
def to_1_line(string):
if pd.isnull(string) or not isinstance(string, str):
return string
return string.replace('\r', '').replace('\n', '')
for cleaning_type, cleaning_spec in cleaning_specs.items():
for country, fields in cleaning_spec.items():
for field in fields:
print('Cleaning ' + country + '.' + field +' to ' + cleaning_type + '.')
if cleaning_type == 'decimal':
dfs[country][field] = dfs[country][field].map(lambda x: round(x, 8))
elif cleaning_type == 'integer':
dfs[country][field] = pd.to_numeric(dfs[country][field], errors='coerce')
dfs[country][field] = dfs[country][field].map(lambda x: '%.0f' % x)
elif cleaning_type == 'date':
dfs[country][field] = dfs[country][field].apply(lambda x: x.date())
elif cleaning_type == 'one-line string':
dfs[country][field] = dfs[country][field].apply(lambda x: to_1_line(x))
print('Done!')
del cleaning_specs
# ## Sort
# In[ ]:
sort_by = {
'DE': 'commissioning_date',
'DK': 'commissioning_date',
'CH': 'commissioning_date',
'FR': 'municipality_code',
'PL': 'district',
'UK': 'commissioning_date'
}
for country, sort_by in sort_by.items():
print('Sorting', country)
try:
dfs[country] = dfs[country].iloc[dfs[country][sort_by].sort_values().index]
except Exception as e:
print('\tException:',e)
print('Done!')
del sort_by
# ## Leave unspecified cells blank
#
# This step may take some time.
# In[ ]:
for country in countries:
print(country)
dfs[country].fillna('', inplace=True)
# ## Separate dirty from clean
#
# We separate all plants which have a validation marker in the comments column into a separate DataFrame and eventually also in a separate CSV file, so the main country files only contain "clean" plants, i.e. those without any special comment. This is useful since all our comments denote that most people would probably not like to include them in their calculations.
# In[ ]:
# In[ ]:
dirty_keys = {
'DE' : 'DE_outvalidated_plants',
'FR' : 'FR_overseas_territories',
}
for country in dirty_keys.keys():
print(country)
idx_dirty = dfs[country][dfs[country].comment.str.len() > 1].index
dirty_key = dirty_keys[country]
dfs[dirty_key] = dfs[country].loc[idx_dirty]
dfs[country] = dfs[country].drop(idx_dirty)
del idx_dirty, dirty_key
# # Capacity time series
#
# This section creates a daily and yearly time series of the cumulated installed capacity by energy source for the United Kingdom, Germany, Denmark, and Switzerland. Three time series will be created for the UK: one for the whole country (GB-UKM), one for Northern Ireland (GB-NIR), and one for the Great Britain (GB-GBN). This data will be part of the output and will be compared in a plot for validation in the next section.
# In[ ]:
daily_timeseries = {}
# In[ ]:
def to_new_level(row):
if(row['energy_source_level_2'] == 'Wind'):
energy_type_label = (row['energy_source_level_2']+'_'+row['technology']).lower()
else:
energy_type_label = row['energy_source_level_2'].lower()
return energy_type_label
def to_daily_timeseries(df, start_date, end_date):
# Combine energy levels to new standardized values
df['energy_type'] = df[['energy_source_level_2', 'energy_source_level_3', 'technology']].apply(to_new_level, axis=1)
# Set range of time series as index
daily_timeseries = pd.DataFrame(index=pd.date_range(start=start_date, end=end_date, freq='D'))
# Create cumulated time series per energy source for both yearly and daily time series
for energy_type in df['energy_type'].unique():
temp = (df[['commissioning_date', 'electrical_capacity']]
.loc[df['energy_type'] == energy_type])
temp_timeseries = temp.set_index('commissioning_date')
temp_timeseries.index = pd.DatetimeIndex(temp_timeseries.index)
# Create cumulated time series per energy_source and day
daily_timeseries[energy_type] = temp_timeseries.resample('D').sum().cumsum().fillna(method='ffill')
# Make sure that the columns are properly filled
daily_timeseries[energy_type]= daily_timeseries[energy_type].fillna(method='ffill').fillna(value=0)
# Reset the time index
daily_timeseries.reset_index(inplace=True)
# Set the index name
daily_timeseries.rename(columns={'index': 'day'}, inplace=True)
# Drop the temporary column "energy_type"
df.drop('energy_type', axis=1, inplace=True)
return daily_timeseries
eligible_for_timeseries = [country for country in countries if 'commissioning_date' in dfs[country].columns]
#eligible_for_timeseries = ['CH', 'DK', 'UK', 'DE']
possible_start_dates = [dfs[country]['commissioning_date'].min() for country in eligible_for_timeseries]
possible_end_dates = [dfs[country]['commissioning_date'].max() for country in eligible_for_timeseries]
#print("Possible start and end dates:")
#for country in eligible_for_timeseries:
# print(country, dfs[country]['commissioning_date'].min(), dfs[country]['commissioning_date'].max())
start_date = min(possible_start_dates)
end_date = max(possible_end_dates)
for country in eligible_for_timeseries:
print("Timeseries for", country)
try:
daily_timeseries[country] = to_daily_timeseries(dfs[country], start_date, end_date)
print('\t Done!')
except Exception as e:
print('\t', e)
# ## Make separate series for Great Britain and Northern Ireland
# In[ ]:
# Create the mask for Northern Ireland
ni_mask = dfs['UK']['country'] == 'Northern Ireland'
# Split the UK data
ni_df = dfs['UK'][ni_mask].copy()
gb_df = dfs['UK'][~ni_mask].copy()
# Make the timeseries for Northern Ireland
daily_timeseries['GB-NIR'] = to_daily_timeseries(ni_df, start_date, end_date)
# Make the timeseries for Great Briatin (England, Wales, Scotland)
daily_timeseries['GB-GBN'] = to_daily_timeseries(gb_df, start_date, end_date)
# Renaming the entry for UK to conform to the ISO codes
daily_timeseries['GB-UKM'] = daily_timeseries.pop('UK')
# ## Create total wind columns
# In[ ]:
# Create column "wind" as sum of onshore and offshore
for country in daily_timeseries:
onshore = 'wind_onshore' in daily_timeseries[country].columns
offshore = 'wind_offshore' in daily_timeseries[country].columns
if onshore and offshore:
daily_timeseries[country]['wind'] = daily_timeseries[country]['wind_onshore'] + daily_timeseries[country]['wind_offshore']
elif onshore and not offshore:
daily_timeseries[country]['wind'] = daily_timeseries[country]['wind_onshore']
elif (not onshore) and offshore:
daily_timeseries[country]['wind'] = daily_timeseries[country]['wind_offshore']
# ## Create one time series file containing al countries
# In[ ]:
unified_daily_timeseries = pd.DataFrame(index=pd.date_range(start=start_date, end=end_date, freq='D'))
# Append the country name to capacity columns' names
for c in daily_timeseries:
new_columns = [c + "_" + col + "_capacity" if col != 'day' else 'day' for col in daily_timeseries[c].columns]
daily_timeseries[c].columns = new_columns
# Unify separate series
unified_daily_timeseries = pd.concat(daily_timeseries.values(), axis=1, sort=False)
# Make sure the day column appears only one
days = unified_daily_timeseries['day']
unified_daily_timeseries.drop('day', axis=1, inplace=True)
unified_daily_timeseries['day'] = days.iloc[:, 0]
# In[ ]:
# sort columns alphabetically
unified_daily_timeseries = unified_daily_timeseries.reindex(sorted(unified_daily_timeseries.columns), axis=1)
unified_daily_timeseries = unified_daily_timeseries.set_index('day').reset_index() # move day column to first position
# drop column DE_Other fossil fuels (we don't want fossil fuels in here as they don't belong into renewables)
# and hydro is not all of hydro but only subsidised hydro, which could be misleading
unified_daily_timeseries.drop(columns='DE_other fossil fuels_capacity', inplace=True)
unified_daily_timeseries.drop(columns='DE_hydro_capacity', inplace=True)
# Show some rows
unified_daily_timeseries.tail(2)
# # Make the normalized dataframe for all the countries
#
# Here, we create a dataframe containing the following data for all the countries:
#
# In[ ]:
geographical_resolution = {
'PL' : 'district',
'FR' : 'municipality',
'CH' : 'municipality',
'DE' : 'power plant',
'DK' : 'power plant',
'UK' : 'power plant'
}
dfs_to_concat = []
columns = ['energy_source_level_1', 'energy_source_level_2', 'energy_source_level_3', 'electrical_capacity',
'data_source', 'municipality', 'lon', 'lat', 'commissioning_date', 'geographical_resolution',
'as_of_year'
]
for country in countries:
country_df = dfs[country].loc[:, columns].copy()
country_df['country'] = country
country_df['geographical_resolution'] = geographical_resolution[country]
if country == 'PL':
country_df['as_of_year'] = 2016
elif country == 'FR':
country_df['as_of_year'] = 2017
dfs_to_concat.append(country_df)
european_df = pd.concat(dfs_to_concat)
european_df.reset_index(inplace=True, drop=True)
european_df.sample(n=10)
# # Output
# This section finally writes the Data Package:
# * CSV + XLSX + SQLite
# * Meta data (JSON)
# In[ ]:
os.makedirs(package_path, exist_ok=True)
# Make sure the daily timeseries has only the date part, not the full datetime with time information
unified_daily_timeseries['day'] = unified_daily_timeseries['day'].dt.date
# ## Write data files
# ### Write CSV-files
#
# One csv-file for each country. This process will take some time depending on you hardware.
# In[ ]:
# Write each country's dataset as a separate csv file
table_names = {}
for country in countries_including_dirty:
print(country)
table_names[country] = 'renewable_power_plants_'+country if country not in countries_dirty else 'res_plants_separated_'+country
dfs[country].to_csv(os.path.join(package_path, table_names[country]+'.csv'),
sep=',',
decimal='.',
date_format='%Y-%m-%d',
line_terminator='\n',
encoding='utf-8',
index=False)
# In[ ]:
# Write daily cumulated time series as csv
unified_daily_timeseries.to_csv(os.path.join(package_path, 'renewable_capacity_timeseries.csv'),
sep=',',
float_format='%.3f',
decimal='.',
date_format='%Y-%m-%d',
encoding='utf-8',
index=False)
print('Done!')
# In[ ]:
european_df.to_csv(os.path.join(package_path, 'renewable_power_plants_EU.csv'),
sep=',',
decimal='.',
date_format='%Y-%m-%d',
line_terminator='\n',
encoding='utf-8',
index=False)
print('Done!')
# In[ ]:
# Write csv of Marker Explanations
validation_marker_df = pd.DataFrame(validation_marker).transpose()
validation_marker_df = validation_marker_df.iloc[:, ::-1] # Reverse column order
validation_marker_df.index.name = 'Validation marker'
validation_marker_df.reset_index(inplace=True)
validation_marker_df.to_csv(os.path.join(package_path, 'validation_marker.csv'),
sep=',',
decimal='.',
date_format='%Y-%m-%d',
line_terminator='\n',
encoding='utf-8',
index=False)
# ### Write XLSX-files
#
# This process will take some time depending on your hardware.
#
# All country power plant list will be written in one xlsx-file. Each country power plant list is written in a separate sheet. As the German power plant list has too many entries for one sheet, it will be split in two. An additional sheet includes the explanations of the marker.
# In[ ]:
# Write the results as xlsx file
get_ipython().run_line_magic('time', "writer = pd.ExcelWriter(os.path.join(package_path, 'renewable_power_plants.xlsx'), engine='xlsxwriter', date_format='yyyy-mm-dd')")
# In[ ]:
print('Writing DE part 1')
get_ipython().run_line_magic('time', "dfs['DE'][:1000000].to_excel(writer, index=False, sheet_name='DE part-1')")
print('Writing DE part 2')
get_ipython().run_line_magic('time', "dfs['DE'][1000000:].to_excel(writer, index=False, sheet_name='DE part-2')")
# In[ ]:
display(dfs.keys())
for country in (countries_non_DE | countries_dirty):
print('Writing ' + country)
get_ipython().run_line_magic('time', 'dfs[country].to_excel(writer, index=False, sheet_name=country)')
print('Writing validation marker sheet')
get_ipython().run_line_magic('time', "validation_marker_df.to_excel(writer, index=False, sheet_name='validation_marker')")
# Save timeseries as Excel
get_ipython().run_line_magic('time', "unified_daily_timeseries.to_excel(writer, index=False, sheet_name='capacity_timeseries')")
print('Saving...')
get_ipython().run_line_magic('time', 'writer.save()')
print('...done!')
# ### Write SQLite
# In[ ]:
# Some date columns are giving the engine some trouble, therefore cast to string:
#if 'DE' in dfs:
# dfs['DE'].decommissioning_date = dfs['DE'].decommissioning_date.astype(str)
# dfs['DE'].commissioning_date = dfs['DE'].commissioning_date.astype(str)
# dfs['DE_outvalidated_plants'].commissioning_date = dfs['DE_outvalidated_plants'].commissioning_date.astype(str)
# Using chunksize parameter is for lower
# memory computers. Removing it might speed things up.
engine = sqlalchemy.create_engine('sqlite:///' + package_path + '/renewable_power_plants.sqlite')
for country in countries_including_dirty:
if country=='DE_outvalidated_plants':
continue # The DE_outvalidated_plants file gives a strange error message. Therefore do not put it into SQLite.
get_ipython().run_line_magic('time', 'dfs[country].to_sql(table_names[country], engine, if_exists="replace", chunksize=100000, index=False)')
validation_marker_df.to_sql('validation_marker', engine, if_exists="replace", chunksize=100000, index=False)
# In[ ]:
# Save timeseries as sqlite
get_ipython().run_line_magic('time', 'european_df.to_sql(\'renewable_power_plants_EU\', engine, if_exists="replace", chunksize=100000, index=False)')
# In[ ]:
# Save timeseries as sqlite
get_ipython().run_line_magic('time', 'unified_daily_timeseries.to_sql(\'renewable_capacity_timeseries\', engine, if_exists="replace", chunksize=100000, index=False)')
# ## Write meta data
#
# The Data Packages meta data are created in the specific JSON format as proposed by the Open Knowledge Foundation. Please see the Frictionless Data project by OKFN (http://data.okfn.org/) and the Data Package specifications (http://dataprotocols.org/data-packages/) for more details.
#
# In order to keep the Jupyter Notebook more readable the metadata is written in the human-readable YAML format using a multi-line string and then parse the string into a Python dictionary and save it as a JSON file.
# In[ ]:
metadata = """
hide: yes
profile: tabular-data-package
_metadataVersion: 1.2
name: opsd_renewable_power_plants
title: Renewable power plants
description: List of renewable energy power stations
longDescription: >-
This Data Package contains a list of renewable energy power plants in lists of
renewable energy-based power plants of Germany, Denmark, France, Switzerland, the
United Kingdom and Poland.
Germany: More than 1.7 million renewable power plant entries, eligible under the
renewable support scheme (EEG).
Denmark: Wind and phovoltaic power plants with a high level of detail.
France: Aggregated capacity and number of installations per energy source per
municipality (Commune).
Poland: Summed capacity and number of installations per energy source
per municipality (Powiat).
Switzerland: Renewable power plants eligible under the Swiss feed in tariff KEV
(Kostendeckende Einspeisevergütung).
United Kingdom: Renewable power plants in the United Kingdom.
Due to different data availability, the power plant lists are of different
accurancy and partly provide different power plant parameter. Due to that, the
lists are provided as seperate csv-files per country and as separate sheets in the
excel file. Suspect data or entries with high probability of duplication are marked
in the column 'comment'. Theses validation markers are explained in the file
validation_marker.csv.
Additionally, the Data Package includes daily time series of cumulated
installed capacity per energy source type for Germany, Denmark, Switzerland and the United Kingdom. All data processing is
conducted in Python and pandas and has been documented in the Jupyter Notebooks linked below.
keywords: [master data register,power plants,renewables,germany,denmark,france,poland,switzerland,united kingdom,open power system data]
spatial:
location: Germany, Denmark, France, Poland, Switzerland, United Kingdom
resolution: Power plants, municipalities
resources:
- path: renewable_power_plants_DE.csv
format: csv
encoding: UTF-8
missingValue: ""
schema:
fields:
- name: commissioning_date
type: date
format: YYYY-MM-DD
description: Date of commissioning of specific unit
opsdContentfilter: "true"
- name: decommissioning_date
type: date
format: YYYY-MM-DD
description: Date of decommissioning of specific unit
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
type: string
opsdContentfilter: "true"
- name: energy_source_level_3
description: Subtype of energy source (e.g. Biomass and biogas)
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
type: string
- name: electrical_capacity
unit: MW
description: Installed electrical capacity in MW
type: number
unit: MW
- name: voltage_level
description: Voltage level of grid connection
type: string
- name: tso
description: Name of transmission system operator of the area the plant is located
type: string
- name: dso
description: Name of distribution system operator of the region the plant is located in
type: string
- name: dso_id
description: Company number of German distribution grid operator
type: string
- name: eeg_id
description: Power plant EEG (German feed-in tariff law) remuneration number
type: string
- name: federal_state
description: Name of German administrative level 'Bundesland'
type: string
- name: postcode
description: German zip-code
type: string
- name: municipality_code
description: German Gemeindenummer (municipalitiy number)
type: string
- name: municipality
description: Name of German Gemeinde (municipality)
type: string
- name: address
description: Street name or name of land parcel
type: string
- name: lat
description: Latitude coordinates
type: geopoint
- name: lon
description: Longitude coordinates
type: geopoint
- name: data_source
description: Source of database entry
type: string
- name: comment
description: Shortcodes for comments related to this entry, explanation can be looked up in validation_marker.csv
type: string
- path: renewable_power_plants_DK.csv
format: csv
encoding: UTF-8
missingValue: ""
schema:
fields:
- name: commissioning_date
type: date
format: YYYY-MM-DD
opsdContentfilter: "true"
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
type: string
opsdContentfilter: "true"
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
type: string
- name: electrical_capacity
unit: MW
description: Installed electrical capacity in MW
type: number
- name: dso
description: Name of distribution system operator of the region the plant is located in
type: string
- name: gsrn_id
description: Danish wind turbine identifier number (GSRN)
type: integer
- name: postcode
description: Danish zip-code
type: string
- name: municipality_code
description: Danish 3-digit Kommune-Nr
type: string
- name: municipality
description: Name of Danish Kommune
type: string
- name: address
description: Street name or name of land parcel
type: string
- name: lat
description: Latitude coordinates
type: geopoint
- name: lon
description: Longitude coordinates
type: geopoint
- name: hub_height
description: Wind turbine hub heigth in m
type: number
- name: rotor_diameter
description: Wind turbine rotor diameter in m
type: number
- name: manufacturer
description: Company that has built the wind turbine
type: string
- name: model
description: Wind turbine model type
type: string
- name: data_source
description: Source of database entry
type: string
- path: renewable_power_plants_FR.csv
format: csv
encoding: UTF-8
missingValue: ""
schema:
fields:
- name: municipality_code
description: French 5-digit INSEE code for Communes
type: string
- name: municipality
description: Name of French Commune
type: string
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
type: string
opsdContentfilter: "true"
- name: energy_source_level_3
description: Subtype of energy source (e.g. Biomass and biogas)
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
type: string
- name: electrical_capacity
unit: MW
description: Installed electrical capacity in MW
type: number
- name: number_of_installations
description: Number of installations of the energy source subtype in the municipality. Due to confidentiality reasons, the values smaller than 3 are published as ''<3'' (as in the source).
type: integer
bareNumber: false
- name: lat
description: Latitude coordinates
type: geopoint
- name: lon
description: Longitude coordinates
type: geopoint
- name: data_source
description: Source of database entry
type: string
- name: as_of_year
description: Year for which the data source compiled the original dataset.
type: integer
- path: renewable_power_plants_PL.csv
format: csv
encoding: UTF-8
missingValue: ""
schema:
fields:
- name: district
description: Name of the Polish powiat
type: string
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
opsdContentfilter: "true"
type: string
- name: energy_source_level_3
description: Subtype of energy source (e.g. Biomass and biogas)
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
type: string
- name: electrical_capacity
unit: MW
description: Installed electrical capacity in MW
type: number
- name: number_of_installations
description: Number of installations of the energy source subtype in the district
type: integer
- name: data_source
description: Source of database entry
type: string
- name: as_of_year
description: Year for which the data source compiled the original dataset.
type: integer
- path: renewable_power_plants_UK.csv
format: csv
encoding: UTF-8
missingValues: ""
schema:
fields:
- name: commissioning_date
description: Date of commissioning of specific unit
type: date
format: YYYY-MM-DD
opsdContentfilter: "true"
- name: uk_beis_id
description: ID for the plant as assigned by UK BEIS.
type: integer
- name: site_name
description: Name of site
type: string
- name: operator
description: Name of operator
type: string
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
opsdContentfilter: "true"
type: string
- name: energy_source_level_3
description: Type of energy source (e.g. Biomass and biogas)
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
type: string
- name: electrical_capacity
description: Installed electrical capacity in MW
unit: MW
type: number
- name: chp
description: Is the project capable of combined heat and power output
type: string
- name: capacity_individual_turbine
description: For windfarms, the individual capacity of each wind turbine in megawatts (MW)
type: number
- name: number_of_turbines
description: For windfarms, the number of wind turbines located on the site
type: integer
- name: solar_mounting_type
description: For solar PV developments, whether the PV panels are ground or roof mounted
type: string
- name: address
description: Address
type: string
- name: municipality
description: Municipality
type: string
- name: region
description: Region
type: string
- name: country
description: The UK's constituent country in which the facility is located.
type: string
- name: postcode
description: Postcode
type: string
- name: lat
description: Latitude coordinates
type: string
- name: lon
description: Longitude coordinates
type: string
- name: data_source
description: The source of database entries
type: string
- name: comment
description: Shortcodes for comments related to this entry, explanation can be looked up in validation_marker.csv
type: string
- path: renewable_power_plants_CH.csv
format: csv
encoding: UTF-8
missingValue: ""
schema:
fields:
- name: commissioning_date
description: Commissioning date
type: date
format: YYYY-MM-DD
opsdContentfilter: "true"
- name: municipality
description: Municipality
type: string
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
type: string
opsdContentfilter: "true"
- name: energy_source_level_3
description: Type of energy source (e.g. Biomass and biogas)
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
type: string
- name: electrical_capacity
unit: MW
description: Installed electrical capacity in MW
type: number
- name: municipality_code
description: Municipality code
type: integer
- name: project_name
description: Name of the project
type: string
- name: production
description: Yearly production in MWh
type: number
- name: tariff
description: Tariff in CHF for 2016
type: number
- name: contract_period_end
description: End year of subsidy contract
type: number
- name: street
description: Street name
type: string
- name: canton
description: Name of the cantones/ member states of the Swiss confederation
type: string
- name: company
description: Name of the company
type: string
- name: lat
description: Latitude coordinates
type: geopoint
- name: lon
description: Longitude coordinates
type: geopoint
- name: data_source
description: Source of database entry
type: string
- path: res_plants_separated_DE_outvalidated_plants.csv
format: csv
encoding: UTF-8
missingValue: ""
schema:
fields:
- name: commissioning_date
type: date
format: YYYY-MM-DD
description: Date of commissioning of specific unit
- name: decommissioning_date
type: date
format: YYYY-MM-DD
description: Date of decommissioning of specific unit
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
type: string
opsdContentfilter: "true"
- name: energy_source_level_3
description: Subtype of energy source (e.g. Biomass and biogas)
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
type: string
- name: electrical_capacity
unit: MW
description: Installed electrical capacity in MW
type: number
unit: MW
- name: thermal_capacity
description: Installed thermal capacity in MW
type: number
unit: MW
- name: voltage_level
description: Voltage level of grid connection
type: string
- name: tso
description: Name of transmission system operator of the area the plant is located
type: string
- name: dso
description: Name of distribution system operator of the region the plant is located in
type: string
- name: dso_id
description: Company number of German distribution grid operator
type: string
- name: eeg_id
description: Power plant EEG (German feed-in tariff law) remuneration number
type: string
- name: federal_state
description: Name of German administrative level 'Bundesland'
type: string
- name: postcode
description: German zip-code
type: string
- name: municipality_code
description: German Gemeindenummer (municipalitiy number)
type: string
- name: municipality
description: Name of German Gemeinde (municipality)
type: string
- name: address
description: Street name or name of land parcel
type: string
- name: lat
description: Latitude coordinates
type: geopoint
- name: lon
description: Longitude coordinates
type: geopoint
- name: data_source
description: Source of database entry
type: string
- name: comment
description: Shortcodes for comments related to this entry, explanation can be looked up in validation_marker.csv
type: string
- path: res_plants_separated_FR_overseas_territories.csv
format: csv
encoding: UTF-8
missingValue: ""
schema:
fields:
- name: municipality_code
description: French 5-digit INSEE code for Communes
type: string
- name: municipality
description: Name of French Commune
type: string
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
type: string
opsdContentfilter: "true"
- name: energy_source_level_3
description: Subtype of energy source (e.g. Biomass and biogas)
type: string
- name: technology
description: Technology to harvest energy source (e.g. Onshore, Photovoltaics)
type: string
- name: electrical_capacity
unit: MW
description: Installed electrical capacity in MW
type: number
- name: number_of_installations
description: Number of installations of the energy source subtype in the municipality
type: integer
- name: lat
description: Latitude coordinates
type: geopoint
- name: lon
description: Longitude coordinates
type: geopoint
- name: data_source
description: Source of database entry
type: string
- path: renewable_power_plants.xlsx
format: xlsx
- path: validation_marker.csv
format: csv
encoding: UTF-8
mediatype: text/csv
missingValue: ""
schema:
fields:
- name: Validation_Marker
description: Name of validation marker utilized in column comment in the renewable_power_plant_germany.csv
type: string
- name: Explanation
description: Comment explaining meaning of validation marker
type: string
- path: renewable_power_plants_EU.csv
format: csv
encoding: UTF-8
mediatype: text/csv
missingValue: ""
schema:
fields:
- name: energy_source_level_1
description: Type of energy source (e.g. Renewable energy)
type: string
- name: energy_source_level_2
description: Type of energy source (e.g. Wind, Solar)
type: string
opsdContentfilter: "true"
- name: energy_source_level_3
description: Type of energy source (e.g. Biomass and biogas)
type: string
- name: electrical_capacity
description: Installed electrical capacity in MW
unit: MW
type: number
- name: data_source
description: Source of database entry
type: string
- name: municipality
description: The name of the municipality in which the facility is located
type: string
- name: lon
description: Geographical longitude
type: number
- name: lat
description: Geographical latitude
type: number
- name: commissioning_date
type: date
format: YYYY-MM-DD
description: Date of commissioning of specific unit
- name: geographical_resolution
description: Precision of geographical information (exact power plant location, municipality, district)
type:
- name: as_of_year
description: Year for which the data source compiled the corresponding dataset
type: integer
- name: country
description: The country in which the facility is located
type: number
- path: renewable_capacity_timeseries.csv
format: csv
encoding: UTF-8
mediatype: text/csv
missingValue: ""
schema:
fields:
- name: day
type: date
description: The day of the timeseries entry
opsdContentfilter: "true"
- name: CH_bioenergy_capacity
description: Cumulative bioenergy electrical capacity for Switzerland in MW
unit: MW
opsdProperties:
Region: Switzerland
Variable: Bioenergy
type: number
source:
name: Own calculation based on plant-level data from Swiss Federal Office of Energy
- name: CH_hydro_capacity
description: Cumulative hydro electrical capacity for Switzerland in MW
unit: MW
opsdProperties:
Region: Switzerland
Variable: Hydro
type: number
source:
name: Own calculation based on plant-level data from Swiss Federal Office of Energy
- name: CH_solar_capacity
description: Cumulative solar electrical capacity for Switzerland in MW
unit: MW
opsdProperties:
Region: Switzerland
Variable: Solar
type: number
source:
name: Own calculation based on plant-level data from Swiss Federal Office of Energy
- name: CH_wind_capacity
ription: Cumulative total wind electrical capacity for Switzerland in MW
unit: MW
opsdProperties:
Region: Switzerland
Variable: Wind
type: number
source:
name: Own calculation based on plant-level data from Swiss Federal Office of Energy
- name: CH_wind_onshore_capacity
description: Cumulative onshore wind electrical capacity for Switzerland in MW
unit: MW
opsdProperties:
Region: Switzerland
Variable: Wind onshore
type: number
source:
name: Own calculation based on plant-level data from Swiss Federal Office of Energy
- name: DE_bioenergy_capacity
description: Cumulative bioenergy electrical capacity for Germany in MW
unit: MW
opsdProperties:
Region: Germany
Variable: Bioenergy
type: number
source:
name: Own calculation based on plant-level data from BNetzA and Netztransparenz.de
- name: DE_geothermal_capacity
description: Cumulative geothermal electrical capacity for Germany in MW
unit: MW
opsdProperties:
Region: Germany
Variable: Geothermal
type: number
source:
name: Own calculation based on plant-level data from BNetzA and Netztransparenz.de
- name: DE_solar_capacity
description: Cumulative solar electrical capacity for Germany in MW
unit: MW
opsdProperties:
Region: Germany
Variable: Solar
type: number
source:
name: Own calculation based on plant-level data from BNetzA and Netztransparenz.de
- name: DE_storage_capacity
description: Cumulative storage electrical capacity for Germany in MW
unit: MW
opsdProperties:
Region: Germany
Variable: Storage
type: number
source:
name: Own calculation based on plant-level data from BNetzA and Netztransparenz.de
- name: DE_wind_offshore_capacity
description: Cumulative offshore wind electrical capacity for Germany in MW
unit: MW
opsdProperties:
Region: Germany
Variable: Wind offshore
type: number
source:
name: Own calculation based on plant-level data from BNetzA and Netztransparenz.de
- name: DE_wind_capacity
ription: Cumulative total wind electrical capacity for Germany in MW
unit: MW
opsdProperties:
Region: Germany
Variable: Wind
type: number
source:
name: Own calculation based on plant-level data from BNetzA and Netztransparenz.de
- name: DE_wind_onshore_capacity
description: Cumulative onshore wind electrical capacity for Germany in MW
unit: MW
opsdProperties:
Region: Germany
Variable: Wind onshore
type: number
source:
name: Own calculation based on plant-level data from BNetzA and Netztransparenz.de
- name: DK_solar_capacity
description: Cumulative solar electrical capacity for Denmark in MW
unit: MW
opsdProperties:
Region: Denmark
Variable: Solar
type: number
source:
name: Own calculation based on plant-level data from Energinet.dk
- name: DK_wind_offshore_capacity
description: Cumulative offshore wind electrical capacity for Denmark in MW
unit: MW
opsdProperties:
Region: Denmark
Variable: Wind offshore
type: number
source:
name: Own calculation based on plant-level data from Danish Energy Agency
- name: DK_wind_capacity
ription: Cumulative total wind electrical capacity for Denmark in MW
unit: MW
opsdProperties:
Region: Denmark
Variable: Wind
type: number
source:
name: Own calculation based on plant-level data from Danish Energy Agency
- name: DK_wind_onshore_capacity
description: Cumulative onshore wind electrical capacity for Denmark in MW
unit: MW
opsdProperties:
Region: Denmark
Variable: Wind onshore
type: number
source:
name: Own calculation based on plant-level data from Danish Energy Agency
- name: GB-GBN_bioenergy_capacity
description: Cumulative bioenergy electrical capacity for Great Britain (England, Scotland, Wales) in MW
unit: MW
opsdProperties:
Region: Great Britain (England, Scotland, Wales)
Variable: Bioenergy
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-GBN_hydro_capacity
description: Cumulative hydro electrical capacity for Great Britain (England, Scotland, Wales) in MW
unit: MW
opsdProperties:
Region: Great Britain (England, Scotland, Wales)
Variable: Hydro
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-GBN_marine_capacity
description: Cumulative marine electrical capacity for Great Britain (England, Scotland, Wales) in MW
unit: MW
opsdProperties:
Region: Great Britain (England, Scotland, Wales)
Variable: Marine
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-GBN_solar_capacity
description: Cumulative solar electrical capacity for Great Britain (England, Scotland, Wales) in MW
unit: MW
opsdProperties:
Region: Great Britain (England, Scotland, Wales)
Variable: Solar
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-GBN_wind_offshore_capacity
description: Cumulative offshore wind electrical capacity for Great Britain (England, Scotland, Wales) in MW
unit: MW
opsdProperties:
Region: Great Britain (England, Scotland, Wales)
Variable: Wind offshore
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-GBN_wind_capacity
description: Cumulative total wind electrical capacity for Great Britain (England, Scotland, Wales) in MW
unit: MW
opsdProperties:
Region: Great Britain (England, Scotland, Wales)
Variable: Wind
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-GBN_wind_onshore_capacity
description: Cumulative onshore wind electrical capacity for Great Britain (England, Scotland, Wales) in MW
unit: MW
opsdProperties:
Region: Great Britain (England, Scotland, Wales)
Variable: Wind onshore
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-NIR_bioenergy_capacity
description: Cumulative bioenergy electrical capacity for Northern Ireland in MW
unit: MW
opsdProperties:
Region: Northern Ireland
Variable: Bioenergy
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-NIR_marine_capacity
description: Cumulative marine electrical capacity for Northern Ireland in MW
unit: MW
opsdProperties:
Region: Northern Ireland
Variable: Marine
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-NIR_solar_capacity
description: Cumulative solar electrical capacity for Northern Ireland in MW
unit: MW
opsdProperties:
Region: Northern Ireland
Variable: Solar
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-NIR_wind_capacity
description: Cumulative total wind electrical capacity for Northern Ireland in MW
unit: MW
opsdProperties:
Region: Northern Ireland
Variable: Wind
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-NIR_wind_onshore_capacity
description: Cumulative onshore wind electrical capacity for Northern Ireland in MW
unit: MW
opsdProperties:
Region: Northern Ireland
Variable: Wind onshore
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-UKM_bioenergy_capacity
description: Cumulative bioenergy electrical capacity for the United Kingdom of Great Britain and Northern Ireland in MW
unit: MW
opsdProperties:
Region: United Kingdom of Great Britain and Northern Ireland
Variable: Bioenergy
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-UKM_hydro_capacity
description: Cumulative hydro electrical capacity for the United Kingdom of Great Britain and Northern Ireland in MW
unit: MW
opsdProperties:
Region: United Kingdom of Great Britain and Northern Ireland
Variable: Hydro
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-UKM_marine_capacity
description: Cumulative marine electrical capacity for the United Kingdom of Great Britain and Northern Ireland in MW
unit: MW
opsdProperties:
Region: United Kingdom of Great Britain and Northern Ireland
Variable: Marine
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-UKM_solar_capacity
description: Cumulative solar electrical capacity for the United Kingdom of Great Britain and Northern Ireland in MW
unit: MW
opsdProperties:
Region: United Kingdom of Great Britain and Northern Ireland
Variable: Solar
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-UKM_wind_offshore_capacity
description: Cumulative offshore wind electrical capacity for the United Kingdom of Great Britain and Northern Ireland in MW
unit: MW
opsdProperties:
Region: United Kingdom of Great Britain and Northern Ireland
Variable: Wind offshore
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-UKM_wind_capacity
description: Cumulative total wind electrical capacity for the United Kingdom of Great Britain and Northern Ireland in MW
unit: MW
opsdProperties:
Region: United Kingdom of Great Britain and Northern Ireland
Variable: Wind
type: number
source:
name: Own calculation based on plant-level data from BEIS
- name: GB-UKM_wind_onshore_capacity
description: Cumulative onshore wind electrical capacity for the United Kingdom of Great Britain and Northern Ireland in MW
unit: MW
opsdProperties:
Region: United Kingdom of Great Britain and Northern Ireland
Variable: Wind onshore
type: number
source:
name: Own calculation based on plant-level data from BEIS
sources:
- title: BNetzA
path: https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/ErneuerbareEnergien/ZahlenDatenInformationen/VOeFF_Registerdaten/2018_12_Veroeff_RegDaten.xlsx?__blob=publicationFile&v=2
description: Bundesnetzagentur register of renewable power plants (excl. PV)
- title: BNetzA_PV
path: https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/ErneuerbareEnergien/ZahlenDatenInformationen/PV_Datenmeldungen/Meldungen_Juli17-Dez18.xlsx?__blob=publicationFile&v=2
description: Bundesnetzagentur register of PV power plants
- title: BNetzA_PV_historic
path: https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/ErneuerbareEnergien/ZahlenDatenInformationen/PV_Datenmeldungen/Archiv_PV/Meldungen_Aug-Juni2017.xlsx?__blob=publicationFile&v=2
description: Bundesnetzagentur register of PV power plants
- title: TransnetBW, TenneT, Amprion, 50Hertz, Netztransparenz.de
path: https://www.netztransparenz.de/de/Anlagenstammdaten.htm
description: Netztransparenz.de - information platform of German TSOs (register of renewable power plants in their control area)
- title: Postleitzahlen Deutschland
path: http://www.suche-postleitzahl.org/downloads
description: Zip codes of Germany linked to geo-information
- title: Energinet.dk
path: http://www.energinet.dk/SiteCollectionDocuments/Danske%20dokumenter/El/SolcelleGraf.xlsx
description: register of Danish wind power plants
- title: Energistyrelsen
path: https://ens.dk/sites/ens.dk/files/Statistik/anlaegprodtilnettet.xls
description: ens.dk - register of Danish Wind power plants
- title: GeoNames
path: http://download.geonames.org/export/zip/
description: geonames.org
- title: Ministry for the Ecological and Inclusive Transition
path: https://www.statistiques.developpement-durable.gouv.fr/donnees-locales-relatives-aux-installations-de-production-delectricite-renouvelable-beneficiant-0?rubrique=23&dossier=189
- title: OpenDataSoft
path: http://public.opendatasoft.com/explore/dataset/correspondance-code-insee-code-postal/download/'\
'?format=csv&refine.statut=Commune%20simple&timezone=Europe/Berlin&use_labels_for_header=true
description: Code Postal - Code INSEE
- title: Urzad Regulacji Energetyki (URE)
path: http://www.ure.gov.pl/uremapoze/mapa.html
description: Energy Regulatory Office of Poland
- title: Bundesamt für Energie (BFE)
path: https://www.bfe.admin.ch/bfe/de/home/foerderung/erneuerbare-energien/einspeiseverguetung/_jcr_content/par/tabs/items/tab/tabpar/externalcontent.external.exturl.xlsx/aHR0cHM6Ly9wdWJkYi5iZmUuYWRtaW4uY2gvZGUvcHVibGljYX/Rpb24vZG93bmxvYWQvOTMxMC54bHN4.xlsx
description: Swiss Federal Office of Energy
- title: UK Government Department of Business, Energy & Industrial Strategy (BEIS)
path: https://www.gov.uk/government/publications/renewable-energy-planning-database-monthly-extract
description: Renewable Energy Planning Database quarterly extract
contributors:
- title: Ingmar Schlecht
role: Maintainer, developer
organization: Neon GmbH
email: schlecht@neon-energie.de
- title: Milos Simic
role: Developer
email: milos.simic.ms@gmail.com
"""
metadata = yaml.load(metadata)
metadata['homepage'] = 'https://data.open-power-system-data.org/renewable_power_plants/'+settings['version']
metadata['id'] = 'https://doi.org/10.25832/renewable_power_plants/'+settings['version']
metadata['last_changes'] = settings['changes']
metadata['version'] = settings['version']
lastYear = int(settings['version'][0:4])-1
metadata['temporal'] = {
'referenceDate': str(lastYear)+'-12-31'
}
metadata['documentation'] = 'https://github.com/Open-Power-System-Data/renewable_power_plants/blob/'+settings['version']+'/main.ipynb'
datapackage_json = json.dumps(metadata, indent=4, separators=(',', ': '), ensure_ascii=False)
# Write the information of the metadata
with open(os.path.join(package_path, 'datapackage.json'), 'w', encoding='utf-8') as f:
f.write(datapackage_json)
f.close()
# ## Generate checksums
#
# Generates checksums.txt
# In[ ]:
def get_sha_hash(path, blocksize=65536):
sha_hasher = hashlib.sha256()
with open(path, 'rb') as f:
buffer = f.read(blocksize)
while len(buffer) > 0:
sha_hasher.update(buffer)
buffer = f.read(blocksize)
return sha_hasher.hexdigest()
files = [
'validation_marker.csv',
'renewable_power_plants.sqlite', 'renewable_power_plants.xlsx',
]
for country in countries_including_dirty:
files.append(table_names[country]+'.csv')
files.append('renewable_capacity_timeseries.csv')
files.append('renewable_power_plants_EU.csv')
with open('checksums.txt', 'w') as f:
for file_name in sorted(files):
print(file_name)
file_hash = get_sha_hash(os.path.join(package_path, file_name))
f.write('{},{}\n'.format(file_name, file_hash))
print('Done!')