from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "last"
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from Index_Calculations import facility_index_gen, index_and_generation
import os
import glob
import numpy as np
from statsmodels.tsa.tsatools import detrend
These definitions are taken from Pétron (2008).
Check actual generation from facilities in 2015 against the state-aggregation method
states_in_NERC = {'WECC': ['WA', 'OR', 'CA', 'NV', 'AZ', 'UT',
'ID', 'MT', 'WY', 'CO', 'NM'],
'MRO': ['ND', 'SD', 'NE', 'MN', 'IA', 'WI'],
'SPP': ['KS', 'OK'],
'ERCOT': ['TX'],
'SERC': ['MO', 'IL', 'AR', 'LA', 'MS', 'AL',
'GA', 'SC', 'NC', 'TN'],
'FRCC': ['FL'],
'RFC': ['MI', 'IN', 'OH', 'PA', 'WV', 'KY',
'VA', 'NJ', 'DE', 'MD'],
'NPCC': ['NY', 'VT', 'ME', 'NH', 'MA', 'RI', 'CT']}
for nerc, states in states_in_NERC.iteritems():
paths = [os.path.join('Data storage',
'state gen data',
state + ' fuels gen.csv') for state in states]
nerc_list = [pd.read_csv(path) for path in paths]
nerc_df = pd.concat(nerc_list)
nerc_df.reset_index(inplace=True, drop=True)
path_out = os.path.join('Data storage', 'state gen data', nerc + ' fuels gen.csv')
nerc_df.to_csv(path_out, index=False)
facility_path = os.path.join('Data storage', 'Facility gen fuels and CO2 2017-05-25.zip')
epa_path = os.path.join('Data storage', 'Monthly EPA emissions 2017-05-25.csv')
ef_path = os.path.join('Data storage', 'Final emission factors.csv')
out_folder = os.path.join('Data storage', 'final NERC data from states')
NERC_path = os.path.join('Data storage', 'Facility NERC labels.csv')
for nerc in states_in_NERC.keys():
all_fuel_path = os.path.join('Data storage', 'state gen data', nerc + ' fuels gen.csv')
index_and_generation(facility_path=facility_path,
all_fuel_path = all_fuel_path,
epa_path=epa_path,
emission_factor_path=ef_path,
export_folder=out_folder, export_path_ext=' '
+ nerc, state=states_in_NERC[nerc])
path = os.path.join('Data storage', 'final NERC data from states', 'Monthly index*')
mi_fns = glob.glob(path)
path = os.path.join('Data storage', 'final NERC data from states', 'Monthly gen*')
mg_fns = glob.glob(path)
df_list = []
for f in mi_fns:
region = f.split()[-1][:-4]
df = pd.read_csv(f)
df['region'] = region
df_list.append(df)
full_mi = pd.concat(df_list)
full_mi.reset_index(inplace=True, drop=True)
full_mi.rename(columns={'index (g/kWh)': 'monthly index (g/kWh)'}, inplace=True)
full_mi['datetime'] = pd.to_datetime(full_mi['datetime'])
df_list = []
for f in mg_fns:
region = f.split()[-1][:-4]
df = pd.read_csv(f)
df['region'] = region
df_list.append(df)
full_mg = pd.concat(df_list)
full_mg.reset_index(inplace=True, drop=True)
full_mg['datetime'] = pd.to_datetime(full_mg['datetime'])
monthly_gen = pd.pivot_table(full_mg, index=['region', 'datetime'],
values='generation (MWh)', columns='fuel category 1')
monthly_gen.reset_index(inplace=True, drop=False)
monthly_gen['Year'] = monthly_gen['datetime'].dt.year
monthly_gen.replace(np.nan, 0, inplace=True)
gen_index = pd.merge(monthly_gen, full_mi[['datetime', 'region', 'monthly index (g/kWh)']],
on=['datetime', 'region'])
gen_index.head()
region | datetime | Coal | Hydro | Natural Gas | Nuclear | Other | Other Renewables | Solar | Wind | Year | monthly index (g/kWh) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | ERCOT | 2001-01-01 | 11683911.0 | 138093.0 | 13750546.0 | 3545310.0 | 1707467.21 | 92021.80 | 0.0 | 83931.0 | 2001 | 638.408119 |
1 | ERCOT | 2001-02-01 | 10236786.0 | 110148.0 | 11507834.0 | 3037626.0 | 510769.63 | 81710.37 | 0.0 | 141647.0 | 2001 | 639.497007 |
2 | ERCOT | 2001-03-01 | 11004470.0 | 180140.0 | 13316335.0 | 2462837.0 | 447733.81 | 81192.19 | 0.0 | 87631.0 | 2001 | 654.137303 |
3 | ERCOT | 2001-04-01 | 9767225.0 | 124232.0 | 14402417.0 | 2668816.0 | 331369.06 | 76768.94 | 0.0 | 115487.0 | 2001 | 633.521538 |
4 | ERCOT | 2001-05-01 | 11449397.0 | 115102.0 | 16025878.0 | 3419870.0 | 383202.65 | 86697.35 | 0.0 | 103312.0 | 2001 | 638.333358 |
for region in gen_index['region'].unique():
gen_index.loc[gen_index['region'] == region, 'Index variability'] = \
gen_index.loc[gen_index['region']==region,
'monthly index (g/kWh)'].rolling(window=12).std()
gen_index.loc[gen_index['region'] == region,
'Normalized Index variability'] = \
gen_index.loc[gen_index['region']==region, 'Index variability'] / \
gen_index.loc[gen_index['region']==region,
'monthly index (g/kWh)'].rolling(window=12).mean()
base_year = 2005
fuels = ['Coal', 'Natural Gas', 'Other Renewables', 'Nuclear', 'Other',
'Solar', 'Wind', 'Hydro']
# fuels = ['Coal', 'Natural Gas', 'Renewables', 'Nuclear', 'Other']
gen_index['Total gen'] = gen_index.loc[:, fuels].sum(axis=1)
for fuel in fuels:
# New columns that are being added
col_percent = 'percent ' + fuel
col_change = 'change in ' + fuel
# Calculate percent of generation from each fuel type
gen_index[col_percent] = gen_index.loc[:, fuel] / gen_index.loc[:, 'Total gen']
# Percent of fuel in region in base year (entire year)
for region in gen_index['region'].unique():
percent_fuel_base = gen_index.loc[(gen_index['Year'] == base_year) &
(gen_index['region'] == region), fuel].sum() / gen_index.loc[(gen_index['Year'] == base_year) &
(gen_index['region'] == region), 'Total gen'].sum()
# Use percent of fuel in base year to calculate change for each region/month
gen_index.loc[gen_index['region'] == region,
col_change] = (gen_index.loc[gen_index['region'] == region, col_percent] - percent_fuel_base) / percent_fuel_base
# Change in variability compared to average base year value
for region in gen_index['region'].unique():
norm_variability_base = gen_index.loc[(gen_index['Year'] == base_year) &
(gen_index['region'] == region), 'Normalized Index variability'].mean()
variability_base = gen_index.loc[(gen_index['Year'] == base_year) &
(gen_index['region'] == region), 'Index variability'].mean()
gen_index.loc[gen_index['region'] == region,
'change in variability'] = (gen_index.loc[gen_index['region'] == region,
'Index variability'] - variability_base) / variability_base
gen_index.loc[gen_index['region'] == region,
'change in norm variability'] = (gen_index.loc[gen_index['region'] == region,
'Normalized Index variability'] - norm_variability_base) / norm_variability_base
from bokeh.plotting import figure, show, output_notebook, ColumnDataSource
from bokeh.plotting import *
from bokeh.layouts import gridplot, column
from bokeh.models import HoverTool, Div
from bokeh.palettes import viridis
from bokeh.charts import TimeSeries
from bokeh.io import export_svgs
/Users/Home/anaconda/lib/python2.7/site-packages/bokeh/util/deprecation.py:34: BokehDeprecationWarning: The bokeh.charts API has moved to a separate 'bkcharts' package. This compatibility shim will remain until Bokeh 1.0 is released. After that, if you want to use this API you will have to install the bkcharts package explicitly. warn(message)
def weighted_percent(df, fuel, year):
all_fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Other', 'Other Renewables',
'Wind', 'Solar', 'Hydro']
temp = df.loc[df['Year'] == year, all_fuels]
temp['Total'] = temp.sum(axis=1)
weighted_per = temp[fuel].sum() / temp['Total'].sum() * 100
return weighted_per
gen_index.columns
Index([u'region', u'datetime', u'Coal', u'Hydro', u'Natural Gas', u'Nuclear', u'Other', u'Other Renewables', u'Solar', u'Wind', u'Year', u'monthly index (g/kWh)', u'Index variability', u'Normalized Index variability', u'Total gen', u'percent Coal', u'change in Coal', u'percent Natural Gas', u'change in Natural Gas', u'percent Other Renewables', u'change in Other Renewables', u'percent Nuclear', u'change in Nuclear', u'percent Other', u'change in Other', u'percent Solar', u'change in Solar', u'percent Wind', u'change in Wind', u'percent Hydro', u'change in Hydro', u'change in variability', u'change in norm variability'], dtype='object')
At the national level, Index variability went up after 2007, and then dipped back down briefly in 2013. This trend can be seen in a few, but not all, of the regions.
I'm not sure what the cyclical trends in Index variability represent. These values are 12 month rolling standard deviations - they shouldn't be affected by seasonal variation. Update: Upticks in the variability or normalized variability could be due to one or more months where the Index value jumped up or down well outside the normal range. This would increase the rolling standard deviation for 12 months.
The normalized variability here is a 12 month rolling standard deviation of CO2 intensity divided by the 12 month rolling average. I'm suspicious of the results in MRO.
sns.set_style('white', {'axes.linewidth': 1.5,
'axes.grid': True})
sns.set_context('notebook', font_scale=1.2)
def region_facet_grid(df, plot_function, x_axis, y_axis, col_order=None,
suptitle='', add_legend=False, ax_labels=None,
FG_kwargs={}, plot_kwargs={}):
g = sns.FacetGrid(df, col_order=col_order, **FG_kwargs)
g.map(plot_function, x_axis, y_axis, **plot_kwargs)
g.set_xticklabels(rotation=35)
if add_legend:
g.add_legend()
if suptitle:
plt.suptitle(suptitle, y=1.02, size=15)
if col_order and 'col' in FG_kwargs:
axes = g.axes.flatten()
for ax, title in zip(axes, order):
ax.set_title(title)
if ax_labels:
g.set_axis_labels(ax_labels)
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
'WECC', 'NPCC']
FG_kwargs = dict(col='region',
hue='region',
col_wrap=3,
aspect=1.2,
hue_order=order)
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
y_axis='Index variability', col_order=order,
suptitle='Index Variability', FG_kwargs=FG_kwargs)
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
'WECC', 'NPCC']
FG_kwargs = dict(col='region',
hue='region',
col_wrap=3,
aspect=1.2,
hue_order=order)
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
y_axis='Normalized Index variability', col_order=order,
suptitle='Normalized Index Variability', FG_kwargs=FG_kwargs)
Figure 1 of paper - also include national
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
'WECC', 'NPCC']
FG_kwargs = dict(col='region',
hue='region',
col_wrap=3,
aspect=1.2,
hue_order=order,
ylim=(0, 1050))
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
y_axis='monthly index (g/kWh)', col_order=order,
suptitle='Monthly Index', FG_kwargs=FG_kwargs)
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
'WECC', 'NPCC']
FG_kwargs = dict(hue='region',
aspect=1.5,
size=5,
hue_order=order,
ylim=(0, 1050))
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
add_legend=True, y_axis='monthly index (g/kWh)', col_order=order,
suptitle='Monthly Index', FG_kwargs=FG_kwargs)
Trying to show why detrending is needed when using the correlation to determine seasonal differences between regions.
order = ['FRCC', 'SPP']
FG_kwargs = dict(hue='region',
aspect=1.5,
size=5,
hue_order=order,
ylim=(200, 1050))
corr_coef = gen_index.loc[gen_index['region'].isin(order)].pivot_table(values='monthly index (g/kWh)',
index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=gen_index.loc[gen_index['region'].isin(order)], plot_function=plt.plot, x_axis='datetime',
add_legend=True, y_axis='monthly index (g/kWh)', col_order=order,
suptitle='Monthly Index (no change)', FG_kwargs=FG_kwargs)
plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))
<matplotlib.text.Text at 0x114a944d0>
order = ['ERCOT', 'SPP']
FG_kwargs = dict(hue='region',
aspect=1.5,
size=5,
hue_order=order,
ylim=(200, 1050))
corr_coef = gen_index.loc[gen_index['region'].isin(order)].pivot_table(values='monthly index (g/kWh)',
index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=gen_index.loc[gen_index['region'].isin(order)], plot_function=plt.plot, x_axis='datetime',
add_legend=True, y_axis='monthly index (g/kWh)', col_order=order,
suptitle='Monthly Index (no change)', FG_kwargs=FG_kwargs)
plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))
<matplotlib.text.Text at 0x11d00b450>
Trying both a linear detrend and subtracting the previous month's value.
I'm not sure if I like the method of subtracting the previous month's value. Need to see if it is retaining the seasonal behavior.
order = ['FRCC', 'SPP']
FG_kwargs = dict(hue='region',
aspect=1.5,
size=5,
hue_order=order,
ylim=(-300, 300))
detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'])
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime',
add_legend=True, y_axis='monthly index (g/kWh)', col_order=order,
suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)
plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))
<matplotlib.text.Text at 0x11c9d4f90>
def shift_detrend(series, n):
'Shift a series by n periods to detrend'
detrended = series - series.shift(n)
return detrended
order = ['FRCC', 'SPP']
FG_kwargs = dict(hue='region',
aspect=1.5,
size=5,
hue_order=order,
ylim=(-300, 300))
detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = shift_detrend(detrend_df.loc[detrend_df['region'] == col,
'monthly index (g/kWh)'], 12)
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime',
add_legend=True, y_axis='monthly index (g/kWh)', col_order=order,
suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)
plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))
<matplotlib.text.Text at 0x11a5dee90>
order = ['ERCOT', 'SPP']
FG_kwargs = dict(hue='region',
aspect=1.5,
size=5,
hue_order=order,
ylim=(-300, 300))
detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'])
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime',
add_legend=True, y_axis='monthly index (g/kWh)', col_order=order,
suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)
plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))
<matplotlib.text.Text at 0x11c9792d0>
order = ['ERCOT', 'SPP']
FG_kwargs = dict(hue='region',
aspect=1.5,
size=5,
hue_order=order,
ylim=(-300, 300))
detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
detrend_df.loc[detrend_df['region'] == col,
'monthly index (g/kWh)'] = shift_detrend(detrend_df.loc[detrend_df['region'] == col,
'monthly index (g/kWh)'], 12)
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime',
add_legend=True, y_axis='monthly index (g/kWh)', col_order=order,
suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)
plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))
<matplotlib.text.Text at 0x11ae1a650>
Plot all regions detrended
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
'WECC', 'NPCC']
FG_kwargs = dict(hue='region',
col='region',
col_wrap=3,
aspect=1.2,
hue_order=order,
ylim=(-300, 300))
detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'])
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime', #col_order=order,
add_legend=True, y_axis='monthly index (g/kWh)', col_order=order,
suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)
Because each region is decreasing over time, need to detrend the data before finding the correlation matrix. Using the statsmodel detrend function, with an assumption that the trends are linear (order 1).
nerc_index = gen_index.pivot_table(values='monthly index (g/kWh)',
index='datetime', columns='region')
nerc_index_detrend = pd.DataFrame(index=nerc_index.index)
for col in nerc_index.columns:
nerc_index_detrend.loc[:, col] = detrend(nerc_index.loc[:, col])
sns.heatmap(nerc_index_detrend.corr(), square=True, cmap='magma_r')
<matplotlib.axes._subplots.AxesSubplot at 0x14a5338d0>
nerc_gen = gen_index.pivot(index='datetime', columns='region', values='Total gen')
regions = nerc_gen.columns
nerc_gen['Total'] = nerc_gen.sum(axis=1)
percent_nerc_gen = nerc_gen.iloc[:, :-1].divide(nerc_gen.iloc[:, -1], axis=0)
percent_nerc_gen.reset_index(inplace=True)
percent_nerc_gen = pd.melt(percent_nerc_gen, id_vars='datetime',
value_name='Percent Generation')
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
'WECC', 'NPCC']
FG_kwargs = dict(hue='region',
aspect=1.5,
size=5,
hue_order=order)
region_facet_grid(df=percent_nerc_gen, plot_function=plt.plot, x_axis='datetime',
add_legend=True, y_axis='Percent Generation',
suptitle='Percent generation from each NERC region', FG_kwargs=FG_kwargs)
nerc_gen['Year'] = nerc_gen.index.year
nerc_gen.groupby('Year').sum()
region | ERCOT | FRCC | MRO | NPCC | RFC | SERC | SPP | WECC | Total |
---|---|---|---|---|---|---|---|---|---|
Year | |||||||||
2001 | 3.725800e+08 | 1.909453e+08 | 2.161632e+08 | 2.605056e+08 | 9.399041e+08 | 9.938657e+08 | 9.999798e+07 | 6.451816e+08 | 3.719144e+09 |
2002 | 3.856285e+08 | 2.033528e+08 | 2.243845e+08 | 2.639318e+08 | 9.726140e+08 | 1.031267e+09 | 1.063719e+08 | 6.522096e+08 | 3.839760e+09 |
2003 | 3.791997e+08 | 2.126100e+08 | 2.270116e+08 | 2.677912e+08 | 9.679986e+08 | 1.036856e+09 | 1.071944e+08 | 6.671349e+08 | 3.865796e+09 |
2004 | 3.902991e+08 | 2.181179e+08 | 2.255123e+08 | 2.714832e+08 | 9.882323e+08 | 1.059385e+09 | 1.075122e+08 | 6.920395e+08 | 3.952582e+09 |
2005 | 3.966687e+08 | 2.202564e+08 | 2.289179e+08 | 2.830366e+08 | 1.018798e+09 | 1.074424e+09 | 1.144705e+08 | 7.005246e+08 | 4.037097e+09 |
2006 | 4.005829e+08 | 2.237516e+08 | 2.300444e+08 | 2.744769e+08 | 9.998087e+08 | 1.070732e+09 | 1.161386e+08 | 7.308524e+08 | 4.046387e+09 |
2007 | 4.054923e+08 | 2.254161e+08 | 2.374609e+08 | 2.784050e+08 | 1.022114e+09 | 1.106244e+09 | 1.229413e+08 | 7.402408e+08 | 4.138315e+09 |
2008 | 4.047878e+08 | 2.196368e+08 | 2.435205e+08 | 2.674164e+08 | 1.000488e+09 | 1.085137e+09 | 1.229592e+08 | 7.572186e+08 | 4.101164e+09 |
2009 | 3.971679e+08 | 2.179523e+08 | 2.407059e+08 | 2.548166e+08 | 9.153819e+08 | 1.049575e+09 | 1.217441e+08 | 7.352388e+08 | 3.932583e+09 |
2010 | 4.116950e+08 | 2.290959e+08 | 2.569122e+08 | 2.666871e+08 | 9.769732e+08 | 1.116945e+09 | 1.201745e+08 | 7.287731e+08 | 4.107256e+09 |
2011 | 4.354574e+08 | 2.218946e+08 | 2.559552e+08 | 2.608021e+08 | 9.513253e+08 | 1.096300e+09 | 1.199656e+08 | 7.406095e+08 | 4.082310e+09 |
2012 | 4.298125e+08 | 2.210961e+08 | 2.549966e+08 | 2.566524e+08 | 9.218282e+08 | 1.078700e+09 | 1.223212e+08 | 7.447823e+08 | 4.030189e+09 |
2013 | 4.333802e+08 | 2.223989e+08 | 2.561657e+08 | 2.515223e+08 | 9.307387e+08 | 1.082139e+09 | 1.221463e+08 | 7.505837e+08 | 4.049075e+09 |
2014 | 4.377934e+08 | 2.301349e+08 | 2.618739e+08 | 2.491858e+08 | 9.424851e+08 | 1.091380e+09 | 1.198891e+08 | 7.551188e+08 | 4.087861e+09 |
2015 | 4.500498e+08 | 2.375687e+08 | 2.667626e+08 | 2.506158e+08 | 9.148136e+08 | 1.087641e+09 | 1.216704e+08 | 7.453607e+08 | 4.074483e+09 |
2016 | 4.558160e+08 | 2.381459e+08 | 2.651052e+08 | 2.443398e+08 | 9.231032e+08 | 1.080953e+09 | 1.258235e+08 | 7.460478e+08 | 4.079335e+09 |
2017 | 9.915130e+07 | 5.071177e+07 | 6.622780e+07 | 5.712312e+07 | 2.159938e+08 | 2.473217e+08 | 2.929221e+07 | 1.810598e+08 | 9.468813e+08 |
all_fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Other', 'Other Renewables',
'Wind', 'Solar', 'Hydro']
value_cols = ['percent {}'.format(fuel) for fuel in all_fuels]
percent_gen_df = pd.melt(gen_index, id_vars=['region', 'datetime'],
value_vars=value_cols, value_name='percent generation',
var_name='fuel')
percent_gen_df['fuel'] = percent_gen_df['fuel'].map(lambda x: x.split()[-1])
percent_gen_df['fuel'].replace('Renewables', 'Other Renewables', inplace=True)
percent_gen_df['fuel'].replace('Gas', 'Natural Gas', inplace=True)
percent_gen_df.head()
region | datetime | fuel | percent generation | |
---|---|---|---|---|
0 | ERCOT | 2001-01-01 | Coal | 0.376885 |
1 | ERCOT | 2001-02-01 | Coal | 0.399461 |
2 | ERCOT | 2001-03-01 | Coal | 0.398997 |
3 | ERCOT | 2001-04-01 | Coal | 0.355349 |
4 | ERCOT | 2001-05-01 | Coal | 0.362512 |
** This could be figure 2?**
percent_gen_df['fuel'].unique()
array(['Coal', 'Natural Gas', 'Nuclear', 'Other', 'Other Renewables', 'Wind', 'Solar', 'Hydro'], dtype=object)
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
'WECC', 'NPCC']
fuel_order = ['Coal', 'Natural Gas', 'Nuclear', 'Hydro', 'Wind', 'Solar',
'Other', 'Other Renewables']
FG_kwargs = dict(hue='fuel',
col='region',
col_wrap=3,
aspect=1.2,
hue_order=fuel_order)
region_facet_grid(df=percent_gen_df, plot_function=plt.plot, x_axis='datetime',
add_legend=True, y_axis='percent generation', col_order=order,
suptitle='', FG_kwargs=FG_kwargs)
def region_bokeh_plot(df, fuel, x_axis, y_axis, x_range=None, y_range=None,
x_lower_lim=0, x_upper_lim=None):
figs = {}
if not x_upper_lim:
x_upper_lim = df[x_axis].max()
for region in df['region'].unique():
temp = gen_index.loc[gen_index['region'] == region]
max_change = temp[x_axis].max()
if max_change <= x_upper_lim and max_change >= x_lower_lim:
source = ColumnDataSource(data=dict(
x=temp[x_axis],
y=temp[y_axis],
datetime=temp['datetime'],
colors= viridis(len(temp))
))
hover = HoverTool(tooltips=[
("(x,y)", "($x, $y)"),
("datetime", "@datetime{%F}")],
formatters={
'datetime': 'datetime', # use 'datetime' formatter
})
hover.point_policy = "snap_to_data"
fuel_2005 = weighted_percent(temp, fuel, 2005)
fuel_2016 = weighted_percent(temp, fuel, 2016)
title = region + ' {:.1f}% to {:.1f}% {}'.format(fuel_2005,
fuel_2016, fuel)
figs[region] = figure(title=title, tools=[hover], y_range=y_range, x_range=x_range,
output_backend="webgl")
figs[region].circle('x', 'y', source=source, color='colors', size=12, alpha=0.5)
figs[region].xaxis.axis_label = x_axis
figs[region].yaxis.axis_label = y_axis
plots = figs.values()
grid = gridplot(plots, ncols=3, plot_width=240, plot_height=240)
show(grid)
figs = {}
for region in percent_gen_df['region'].unique():
temp = percent_gen_df.loc[percent_gen_df['region'] == region]
pivoted = pd.pivot_table(temp, index='datetime', columns='fuel')
pivoted.reset_index(inplace=True, drop=False)
source = ColumnDataSource(data=dict(
x=pivoted['datetime'],
y=pivoted['percent generation'],
datetime=pivoted['datetime']
))
# p = TimeSeries(pivoted, tools='hover')
hover = HoverTool(tooltips=[
("fuel", "@fuel"),
('% gen', '$y'),
("datetime", "@datetime{%F}")],
formatters={
'datetime': 'datetime', # use 'datetime' formatter
})
hover.point_policy = "snap_to_data"
hover.mode = 'vline'
figs[region] = figure(title=region, tools=[hover],
output_backend="webgl")
figs[region].multi_line('x', 'y', source=source)
figs[region].xaxis.axis_label = 'datetime'
figs[region].yaxis.axis_label = 'percent generation'
plots = figs.values()
grid = gridplot(plots, ncols=3, plot_width=240, plot_height=240)
show(grid)
Temporal variability is a sign that decarbonization is happening, but unevenly across the year. If the variability is not correlated between regions, better transmission would allow for a more even spread of low-carbon electricity. As the percent of those technologies grows, each region could export excess low(er)-carbon power. But if they are negatively correlated, then each region is going to be generating higher-CO2 power at the same time. New technologies will be necessary to deal with this.
Ines: Is there a point where variability becomes of return?
Paper results subsections (order could change - method will be in the same order, short paragraphs that explain each):
Plot ideas (figures 3, 4, and 5):
For results:
This set of plots - and the ones repeated below - show:
The cleanest relationship that I see is that as coal goes down, variability goes up. The trend is pretty linear, and extends across a bunch of different NERC regions. A few interesting notes:
region_bokeh_plot(gen_index, 'Coal', 'percent Coal',
'change in norm variability',
y_range=(-1, 7), x_range=(0, 0.85))
region_bokeh_plot(gen_index, 'Coal', 'change in Coal',
'change in norm variability', y_range=(-1, 7))
region_bokeh_plot(gen_index, 'Wind', 'percent Wind',
'change in norm variability',
y_range=(-1, 7), x_range=(0, 0.35))
region_bokeh_plot(gen_index, 'Wind', 'change in Wind',
'change in norm variability', y_range=(-1, 7))
region_bokeh_plot(gen_index, 'Natural Gas', 'percent Natural Gas',
'change in norm variability',
y_range=(-1, 7), x_range=(0, 0.75))
region_bokeh_plot(gen_index, 'Natural Gas', 'change in Natural Gas',
'change in norm variability', y_range=(-1, 7))
region_bokeh_plot(gen_index, 'Hydro', 'percent Hydro',
'change in norm variability',
y_range=(-1, 7), x_range=(0, 0.45))
region_bokeh_plot(gen_index, 'Hydro', 'change in Hydro',
'change in norm variability', y_range=(-1, 7))
region_bokeh_plot(gen_index, 'Solar', 'percent Solar',
'change in norm variability',
y_range=(-1, 7), x_range=(0, 0.08))
Most NERC regions didn't have recorded solar in 2005, so this plot doesn't make much sense
region_bokeh_plot(gen_index, 'Solar', 'change in Solar',
'change in norm variability', y_range=(-1, 7))
region_bokeh_plot(gen_index, 'Nuclear', 'percent Nuclear',
'change in norm variability',
y_range=(-1, 7), x_range=(0, 0.35))
region_bokeh_plot(gen_index, 'Solar', 'change in Solar',
'change in norm variability', y_range=(-1, 7))
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
'WECC', 'NPCC']
def facet_scatter(x, y, c, **kwargs):
"""Draw scatterplot with point colors from a faceted DataFrame columns."""
kwargs.pop("color")
plt.scatter(x, y, c=c, **kwargs)
vmin = gen_index['Normalized Index variability'].min()
vmax = gen_index['Normalized Index variability'].max()
cmap = plt.get_cmap('viridis')
#sns.diverging_palette(240, 10, l=65, center="dark", as_cmap=True)
g = sns.FacetGrid(gen_index, col='region', col_wrap=3, aspect=1.2,
col_order=order, palette='viridis')
g.map(facet_scatter, 'percent Coal', 'percent Natural Gas',
'Normalized Index variability', alpha=0.5, s=100,
vmin=vmin, vmax=vmax, cmap=cmap)
# Make space for the colorbar
# g.fig.subplots_adjust(right=.92)
# Define a new Axes where the colorbar will go
# cax = g.fig.add_axes([.94, .25, .02, .6])
cax = g.fig.add_axes([.82, .05, .02, .23])
# Get a mappable object with the same colormap as the data
points = plt.scatter([], [], c=[], vmin=vmin, vmax=vmax, cmap=cmap)
# Draw the colorbar
g.fig.colorbar(points, cax=cax).outline.set_visible(False)
g.fig.text(.84, .3, 'Normalized Variability', ha='center')
<seaborn.axisgrid.FacetGrid at 0x150bab710>
<matplotlib.text.Text at 0x151356a10>
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
'WECC', 'NPCC']
def facet_scatter(x, y, c, **kwargs):
"""Draw scatterplot with point colors from a faceted DataFrame columns."""
kwargs.pop("color")
plt.scatter(x, y, c=c, **kwargs)
vmin = gen_index['Normalized Index variability'].min()
vmax = gen_index['Normalized Index variability'].max()
cmap = plt.get_cmap('viridis')
#sns.diverging_palette(240, 10, l=65, center="dark", as_cmap=True)
g = sns.FacetGrid(gen_index, col='region', col_wrap=3, aspect=1.2,
col_order=order, palette='viridis')
g.map(facet_scatter, 'percent Coal', 'percent Wind',
'Normalized Index variability', alpha=0.5, s=100,
vmin=vmin, vmax=vmax, cmap=cmap)
# Make space for the colorbar
# g.fig.subplots_adjust(right=.92)
# Define a new Axes where the colorbar will go
# cax = g.fig.add_axes([.94, .25, .02, .6])
cax = g.fig.add_axes([.82, .05, .02, .23])
# Get a mappable object with the same colormap as the data
points = plt.scatter([], [], c=[], vmin=vmin, vmax=vmax, cmap=cmap)
# Draw the colorbar
g.fig.colorbar(points, cax=cax).outline.set_visible(False)
g.fig.text(.84, .3, 'Normalized Variability', ha='center')
<seaborn.axisgrid.FacetGrid at 0x13d980c90>
<matplotlib.text.Text at 0x13aef74d0>
import ternary
points = gen_index
figure, tax = ternary.figure(scale=1)
tax.scatter()
color_col = 'Normalized Index variability'
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
'WECC', 'NPCC']
gen_index['points'] = gen_index[['percent Coal', 'percent Natural Gas', 'percent Wind']].apply(tuple, axis=1)
def ternary_facet_scatter(points, c, **kwargs):
"""Draw scatterplot with point colors from a faceted DataFrame columns."""
kwargs.pop("color")
scale = 1
ax = plt.gca()
figure, tax = ternary.figure(ax=ax, scale=scale)
tax.boundary(linewidth = 1.0)
# tax.gridlines(multiple=10,color="blue")
tax.scatter(points, c=c, **kwargs)
tax.left_axis_label("Percent Wind")
tax.right_axis_label("Percent Natural Gas")
tax.bottom_axis_label("Percent Coal")
tax.gridlines(multiple=.25, color='k', linestyle='-')
tax.ticks(axis='lbr', multiple=0.25, linewidth=0)
tax.clear_matplotlib_ticks()
# plt.scatter(x, y, c=c, **kwargs)
vmin = gen_index[color_col].min()
vmax = gen_index[color_col].max()
cmap = plt.get_cmap('viridis')
#sns.diverging_palette(240, 10, l=65, center="dark", as_cmap=True)
with sns.axes_style('white', {'axes.linewidth': 0,
'axes.grid': False}):
g = sns.FacetGrid(gen_index, col='region', col_wrap=3,
col_order=order, palette='viridis')
g.map(ternary_facet_scatter, 'points',
color_col, alpha=0.5, s=100, cmap=cmap,
vmin=vmin, vmax=vmax)
# Make space for the colorbar
# g.fig.subplots_adjust(right=.92)
# Define a new Axes where the colorbar will go
# cax = g.fig.add_axes([.94, .25, .02, .6])
cax = g.fig.add_axes([.82, .05, .02, .23])
# Get a mappable object with the same colormap as the data
points = plt.scatter([], [], c=[], cmap=cmap, vmin=vmin, vmax=vmax)
# Draw the colorbar
g.fig.colorbar(points, cax=cax).outline.set_visible(False)
g.fig.text(.84, .3, 'Normalized Variability', ha='center')
axes = g.axes.flatten()
for ax, title in zip(axes, order):
ax.set_title(title)
ax.set_xlabel('')
ax.set_ylabel('')
<seaborn.axisgrid.FacetGrid at 0x14d450990>
<matplotlib.text.Text at 0x1503e67d0>
<matplotlib.text.Text at 0x14f7e8490>
<matplotlib.text.Text at 0x14f5a7090>
<matplotlib.text.Text at 0x14f791490>
<matplotlib.text.Text at 0x14f728610>
<matplotlib.text.Text at 0x14f6f6a50>
<matplotlib.text.Text at 0x14f70d090>
<matplotlib.text.Text at 0x14f77e710>
<matplotlib.text.Text at 0x14f9fc850>
<matplotlib.text.Text at 0x14f76de50>
<matplotlib.text.Text at 0x14fa3d210>
<matplotlib.text.Text at 0x14fa28450>
<matplotlib.text.Text at 0x14fa3d310>
<matplotlib.text.Text at 0x14fbbbb50>
<matplotlib.text.Text at 0x14fb99690>
<matplotlib.text.Text at 0x14fa28f90>
<matplotlib.text.Text at 0x14fc2c390>
<matplotlib.text.Text at 0x14fbffcd0>
<matplotlib.text.Text at 0x14fc16b90>
<matplotlib.text.Text at 0x14a1e7350>
<matplotlib.text.Text at 0x14fc86ad0>
<matplotlib.text.Text at 0x14fc9c990>
<matplotlib.text.Text at 0x14eb1f7d0>
<matplotlib.text.Text at 0x14ec62110>
<matplotlib.text.Text at 0x14ea27290>
df = gen_index.loc[gen_index['region'] == 'FRCC']
points = df['points']
c = df['Normalized Index variability']
scale = 1
# ax = plt.gca()
figure, tax = ternary.figure(scale=scale)
tax.boundary(linewidth = 1.0)
# tax.gridlines(multiple=10,color="blue")
tax.scatter(points, c=c)
tax.left_axis_label("Percent Wind")
tax.right_axis_label("Percent Natural Gas")
tax.bottom_axis_label("Percent Coal")
tax.gridlines(multiple=.25, color='k', linestyle='-')
tax.ticks(axis='lbr', multiple=0.25, linewidth=0)
tax.clear_matplotlib_ticks()
tax.show()
<matplotlib.axes._subplots.AxesSubplot at 0x150a3ad90>