Renewables.ninja PV and Wind Profiles: Processing Notebook
This notebook is part of the Renewables.ninja PV and Wind Profiles Data Package available on Open Power System Data. |
%load_ext autoreload
%autoreload 2
import glob
import os
import sqlite3
import hashlib
import shutil
import pandas as pd
import generate_metadata
version = '2017-07-19'
changes = 'Initial release'
##
# Assumes all country data has been downloaded to ./country_downloads
##
in_dir = './country_downloads/*'
dataframes = []
for d in glob.glob(in_dir):
for f in glob.glob(d + '/*.csv'):
run_identifier = f.split('/')[-1].strip('.csv').split('_')
model = run_identifier[1]
iso = run_identifier[3]
dataset = run_identifier[4]
if model == 'wind':
run = dataset.replace('-merra-2', '')
dataset = 'merra-2'
else:
run = 'current'
df = pd.read_csv(f, skiprows=2, index_col=0, parse_dates=True)
df = pd.concat([df], keys=[run], names=['scenario'], axis=1)
df = pd.concat([df], keys=[dataset], names=['dataset'], axis=1)
df = pd.concat([df], keys=[model], names=['tech'], axis=1)
df = pd.concat([df], keys=[iso], names=['region'], axis=1)
dataframes.append(df)
complete_data = pd.concat(dataframes, axis=1)
complete_data.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 324336 entries, 1980-01-01 00:00:00 to 2016-12-31 23:00:00 Columns: 175 entries, (AL, pv, merra-2, current, national) to (SK, wind, merra-2, current, national) dtypes: float64(175) memory usage: 435.5 MB
# Filter MERRA-2 data
idx = pd.IndexSlice
df = complete_data.loc[:, idx[:, :, ['merra-2'], :, :]]
df.columns.names = ['region', 'tech', 'dataset', 'scenario', 'aggregation']
df.columns = df.columns.droplevel(2) # Drop dataset, since we only select MERRA-2 anyway
df.columns = df.columns.swaplevel('scenario', 'aggregation')
# See generate_metadata.py for details
generate_metadata.generate_json(df, version, changes)
Data are provided in three different "shapes":
df_multiindex = df
df_singleindex = df.copy()
# use first 5 levels of multiindex to create singleindex
df_singleindex.columns = [
'_'.join([level for level in list(col)])
for col in df.columns.values
]
df_stacked = df.copy()
df_stacked = df_stacked.transpose().stack(dropna=True).to_frame(name='data')
This file format is required for the filtering function on the OPSD website. This takes about 30 seconds to complete.
df = df_singleindex.copy()
df.index = df.index.strftime('%Y-%m-%dT%H:%M:%SZ')
filepath = os.path.join(version, 'ninja_pv_wind_profiles.sqlite')
df.to_sql('ninja_pv_wind_profiles_singleindex', sqlite3.connect(filepath),
if_exists='replace', index_label='time')
Writing the full tables to Excel takes extremely long. As a workaround, only the first 5 rows are exported. The rest of the data can than be inserted manually from the _multindex.csv
files.
# filepath = os.path.join(version, 'ninja_pv_wind_profiles.xlsx')
# writer = pd.ExcelWriter(filepath)
# df.to_excel(writer, 'ninja_pv_wind_profilesninja_european_capacity_factors', float_format='%.2f', merge_cells=True)
# writer.save()
def save_df(df, stacking_key):
filepath = os.path.join(version, 'ninja_pv_wind_profiles_' + stacking_key + '.csv')
df.to_csv(filepath, float_format='%.4f',
date_format='%Y-%m-%dT%H:%M:%SZ')
save_df(df_multiindex, 'multiindex')
save_df(df_singleindex, 'singleindex')
# save_df(df_stacked, 'stacked') # Disabled - file too large
We publish SHA checksums for the output files on GitHub to allow verifying their integrity on the OPSD server.
def get_sha_hash(path, blocksize=65536):
sha_hasher = hashlib.sha256()
with open(path, 'rb') as f:
buffer = f.read(blocksize)
while len(buffer) > 0:
sha_hasher.update(buffer)
buffer = f.read(blocksize)
return sha_hasher.hexdigest()
filepath = os.path.join(version, 'checksums.txt')
files = os.listdir(version)
# Create checksums.txt in the version directory
with open(filepath, 'w') as f:
for file_name in files:
file_hash = get_sha_hash(os.path.join(version, file_name))
f.write('{},{}\n'.format(file_name, file_hash))
# Copy the file to root directory from where it will be pushed to GitHub
shutil.copyfile(filepath, 'checksums.txt')
'checksums.txt'