In [1]:

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "last"

In [2]:

%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from Index_Calculations import facility_index_gen, index_and_generation
import os
import glob
import numpy as np
from statsmodels.tsa.tsatools import detrend

Group state-level total gen into approx NERC regions¶

These definitions are taken from Pétron (2008).

Check actual generation from facilities in 2015 against the state-aggregation method

In [2]:

states_in_NERC = {'WECC': ['WA', 'OR', 'CA', 'NV', 'AZ', 'UT',
                           'ID', 'MT', 'WY', 'CO', 'NM'],
                  'MRO': ['ND', 'SD', 'NE', 'MN', 'IA', 'WI'],
                  'SPP': ['KS', 'OK'],
                  'ERCOT': ['TX'],
                  'SERC': ['MO', 'IL', 'AR', 'LA', 'MS', 'AL',
                           'GA', 'SC', 'NC', 'TN'],
                  'FRCC': ['FL'],
                  'RFC': ['MI', 'IN', 'OH', 'PA', 'WV', 'KY',
                          'VA', 'NJ', 'DE', 'MD'],
                  'NPCC': ['NY', 'VT', 'ME', 'NH', 'MA', 'RI', 'CT']}

In [24]:

for nerc, states in states_in_NERC.iteritems():
    paths = [os.path.join('Data storage', 
                          'state gen data', 
                          state + ' fuels gen.csv') for state in states]
    nerc_list = [pd.read_csv(path) for path in paths]
    nerc_df = pd.concat(nerc_list)
    nerc_df.reset_index(inplace=True, drop=True)
    
    path_out = os.path.join('Data storage', 'state gen data', nerc + ' fuels gen.csv')
    nerc_df.to_csv(path_out, index=False)

Calculate index values for each NERC region¶

In [25]:

facility_path = os.path.join('Data storage', 'Facility gen fuels and CO2 2017-05-25.zip')
epa_path = os.path.join('Data storage', 'Monthly EPA emissions 2017-05-25.csv')
ef_path = os.path.join('Data storage', 'Final emission factors.csv')
out_folder = os.path.join('Data storage', 'final NERC data from states')
NERC_path = os.path.join('Data storage', 'Facility NERC labels.csv')

for nerc in states_in_NERC.keys():
    all_fuel_path = os.path.join('Data storage', 'state gen data', nerc + ' fuels gen.csv')
    index_and_generation(facility_path=facility_path,
                         all_fuel_path = all_fuel_path,
                         epa_path=epa_path,
                         emission_factor_path=ef_path,
                         export_folder=out_folder, export_path_ext=' '
                         + nerc, state=states_in_NERC[nerc])

Read data and create figures¶

In [3]:

path = os.path.join('Data storage', 'final NERC data from states', 'Monthly index*')
mi_fns = glob.glob(path)
path = os.path.join('Data storage', 'final NERC data from states', 'Monthly gen*')
mg_fns = glob.glob(path)

In [4]:

df_list = []
for f in mi_fns:
    region = f.split()[-1][:-4]
    df = pd.read_csv(f)
    df['region'] = region
    df_list.append(df)
full_mi = pd.concat(df_list)
full_mi.reset_index(inplace=True, drop=True)
full_mi.rename(columns={'index (g/kWh)': 'monthly index (g/kWh)'}, inplace=True)
full_mi['datetime'] = pd.to_datetime(full_mi['datetime'])

In [5]:

df_list = []
for f in mg_fns:
    region = f.split()[-1][:-4]
    df = pd.read_csv(f)
    df['region'] = region
    df_list.append(df)
full_mg = pd.concat(df_list)
full_mg.reset_index(inplace=True, drop=True)
full_mg['datetime'] = pd.to_datetime(full_mg['datetime'])

monthly_gen = pd.pivot_table(full_mg, index=['region', 'datetime'], 
                             values='generation (MWh)', columns='fuel category 1')
monthly_gen.reset_index(inplace=True, drop=False)
monthly_gen['Year'] = monthly_gen['datetime'].dt.year
monthly_gen.replace(np.nan, 0, inplace=True)

In [6]:

gen_index = pd.merge(monthly_gen, full_mi[['datetime', 'region', 'monthly index (g/kWh)']], 
                     on=['datetime', 'region'])

In [7]:

gen_index.head()

Out[7]:

	region	datetime	Coal	Hydro	Natural Gas	Nuclear	Other	Other Renewables	Wind	Year	monthly index (g/kWh)
0	ERCOT	2001-01-01	11683911.0	138093.0	13750546.0	3545310.0	1707467.21	92021.80	83931.0	2001	638.408119
1	ERCOT	2001-02-01	10236786.0	110148.0	11507834.0	3037626.0	510769.63	81710.37	141647.0	2001	639.497007
2	ERCOT	2001-03-01	11004470.0	180140.0	13316335.0	2462837.0	447733.81	81192.19	87631.0	2001	654.137303
3	ERCOT	2001-04-01	9767225.0	124232.0	14402417.0	2668816.0	331369.06	76768.94	115487.0	2001	633.521538
4	ERCOT	2001-05-01	11449397.0	115102.0	16025878.0	3419870.0	383202.65	86697.35	103312.0	2001	638.333358

In [7]:

for region in gen_index['region'].unique():
    gen_index.loc[gen_index['region'] == region, 'Index variability'] = \
        gen_index.loc[gen_index['region']==region, 
                       'monthly index (g/kWh)'].rolling(window=12).std()
    
    gen_index.loc[gen_index['region'] == region, 
                   'Normalized Index variability'] = \
         gen_index.loc[gen_index['region']==region, 'Index variability'] / \
         gen_index.loc[gen_index['region']==region, 
                       'monthly index (g/kWh)'].rolling(window=12).mean()

In [8]:

base_year = 2005

In [9]:

fuels = ['Coal', 'Natural Gas', 'Other Renewables', 'Nuclear', 'Other',
         'Solar', 'Wind', 'Hydro']
# fuels = ['Coal', 'Natural Gas', 'Renewables', 'Nuclear', 'Other']
gen_index['Total gen'] = gen_index.loc[:, fuels].sum(axis=1)
for fuel in fuels:
    # New columns that are being added
    col_percent = 'percent ' + fuel
    col_change = 'change in ' + fuel

    # Calculate percent of generation from each fuel type
    gen_index[col_percent] = gen_index.loc[:, fuel] / gen_index.loc[:, 'Total gen']

    # Percent of fuel in region in base year (entire year)
    for region in gen_index['region'].unique():
        percent_fuel_base = gen_index.loc[(gen_index['Year'] == base_year) & 
                                          (gen_index['region'] == region), fuel].sum() / gen_index.loc[(gen_index['Year'] == base_year) & 
                                                                                                     (gen_index['region'] == region), 'Total gen'].sum()

        # Use percent of fuel in base year to calculate change for each region/month
        gen_index.loc[gen_index['region'] == region, 
                      col_change] = (gen_index.loc[gen_index['region'] == region, col_percent] - percent_fuel_base) / percent_fuel_base
    
# Change in variability compared to average base year value
for region in gen_index['region'].unique():
    norm_variability_base = gen_index.loc[(gen_index['Year'] == base_year) & 
                                  (gen_index['region'] == region), 'Normalized Index variability'].mean()
    variability_base = gen_index.loc[(gen_index['Year'] == base_year) & 
                                  (gen_index['region'] == region), 'Index variability'].mean()
    
    gen_index.loc[gen_index['region'] == region, 
                  'change in variability'] = (gen_index.loc[gen_index['region'] == region, 
                                                            'Index variability'] - variability_base) / variability_base
    
    gen_index.loc[gen_index['region'] == region, 
                  'change in norm variability'] = (gen_index.loc[gen_index['region'] == region, 
                                                            'Normalized Index variability'] - norm_variability_base) / norm_variability_base

In [18]:

from bokeh.plotting import figure, show, output_notebook, ColumnDataSource
from bokeh.plotting import *
from bokeh.layouts import gridplot, column
from bokeh.models import HoverTool, Div
from bokeh.palettes import viridis
from bokeh.charts import TimeSeries
from bokeh.io import export_svgs

/Users/Home/anaconda/lib/python2.7/site-packages/bokeh/util/deprecation.py:34: BokehDeprecationWarning: 
The bokeh.charts API has moved to a separate 'bkcharts' package.

This compatibility shim will remain until Bokeh 1.0 is released.
After that, if you want to use this API you will have to install
the bkcharts package explicitly.

  warn(message)

In [19]:

output_notebook()

Loading BokehJS ...

In [20]:

def weighted_percent(df, fuel, year):
    all_fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Other', 'Other Renewables',
                 'Wind', 'Solar', 'Hydro']
    temp = df.loc[df['Year'] == year, all_fuels]
    temp['Total'] = temp.sum(axis=1)
    
    weighted_per = temp[fuel].sum() / temp['Total'].sum() * 100
    return weighted_per

In [13]:

gen_index.columns

Out[13]:

Index([u'region', u'datetime', u'Coal', u'Hydro', u'Natural Gas', u'Nuclear',
       u'Other', u'Other Renewables', u'Solar', u'Wind', u'Year',
       u'monthly index (g/kWh)', u'Index variability',
       u'Normalized Index variability', u'Total gen', u'percent Coal',
       u'change in Coal', u'percent Natural Gas', u'change in Natural Gas',
       u'percent Other Renewables', u'change in Other Renewables',
       u'percent Nuclear', u'change in Nuclear', u'percent Other',
       u'change in Other', u'percent Solar', u'change in Solar',
       u'percent Wind', u'change in Wind', u'percent Hydro',
       u'change in Hydro', u'change in variability',
       u'change in norm variability'],
      dtype='object')

Actual and Normalized variability over time in each NERC region¶

At the national level, Index variability went up after 2007, and then dipped back down briefly in 2013. This trend can be seen in a few, but not all, of the regions.

I'm not sure what the cyclical trends in Index variability represent. These values are 12 month rolling standard deviations - they shouldn't be affected by seasonal variation. Update: Upticks in the variability or normalized variability could be due to one or more months where the Index value jumped up or down well outside the normal range. This would increase the rolling standard deviation for 12 months.

The normalized variability here is a 12 month rolling standard deviation of CO2 intensity divided by the 12 month rolling average. I'm suspicious of the results in MRO.

In [12]:

sns.set_style('white', {'axes.linewidth': 1.5,
                        'axes.grid': True})
sns.set_context('notebook', font_scale=1.2)

In [13]:

def region_facet_grid(df, plot_function, x_axis, y_axis, col_order=None,
                      suptitle='', add_legend=False, ax_labels=None,
                      FG_kwargs={}, plot_kwargs={}):
    g = sns.FacetGrid(df, col_order=col_order, **FG_kwargs)
    g.map(plot_function, x_axis, y_axis, **plot_kwargs)
    g.set_xticklabels(rotation=35)
    if add_legend:
        g.add_legend()
    if suptitle:
        plt.suptitle(suptitle, y=1.02, size=15)
    if col_order and 'col' in FG_kwargs:
        axes = g.axes.flatten()
        for ax, title in zip(axes, order):
            ax.set_title(title)
    if ax_labels:
        g.set_axis_labels(ax_labels)

In [278]:

order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
FG_kwargs = dict(col='region',
                 hue='region',
                 col_wrap=3,
                 aspect=1.2,
                 hue_order=order)
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
                  y_axis='Index variability', col_order=order, 
                  suptitle='Index Variability', FG_kwargs=FG_kwargs)

In [273]:

order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
FG_kwargs = dict(col='region',
                 hue='region',
                 col_wrap=3,
                 aspect=1.2,
                 hue_order=order)
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
                  y_axis='Normalized Index variability', col_order=order, 
                  suptitle='Normalized Index Variability', FG_kwargs=FG_kwargs)

Actual index values over time in each NERC region¶

Figure 1 of paper - also include national

In [275]:

order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
FG_kwargs = dict(col='region',
                 hue='region',
                 col_wrap=3,
                 aspect=1.2,
                 hue_order=order,
                 ylim=(0, 1050))

region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
                  y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index', FG_kwargs=FG_kwargs)

In [280]:

order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(0, 1050))
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index', FG_kwargs=FG_kwargs)

Example of Index correlation between two regions, with and without detrending¶

Trying to show why detrending is needed when using the correlation to determine seasonal differences between regions.

In [38]:

order = ['FRCC', 'SPP']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(200, 1050))

corr_coef = gen_index.loc[gen_index['region'].isin(order)].pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=gen_index.loc[gen_index['region'].isin(order)], plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (no change)', FG_kwargs=FG_kwargs)
plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))

Out[38]:

<matplotlib.text.Text at 0x114a944d0>

In [43]:

order = ['ERCOT', 'SPP']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(200, 1050))

corr_coef = gen_index.loc[gen_index['region'].isin(order)].pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=gen_index.loc[gen_index['region'].isin(order)], plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (no change)', FG_kwargs=FG_kwargs)
plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))

Out[43]:

<matplotlib.text.Text at 0x11d00b450>

Correlation of detrended data¶

Trying both a linear detrend and subtracting the previous month's value.

I'm not sure if I like the method of subtracting the previous month's value. Need to see if it is retaining the seasonal behavior.

In [41]:

order = ['FRCC', 'SPP']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(-300, 300))

detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
    detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'])
    
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)

plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))

Out[41]:

<matplotlib.text.Text at 0x11c9d4f90>

In [10]:

def shift_detrend(series, n):
    'Shift a series by n periods to detrend'
    detrended = series - series.shift(n)
    return detrended

In [14]:

order = ['FRCC', 'SPP']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(-300, 300))


detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
    detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = shift_detrend(detrend_df.loc[detrend_df['region'] == col,
                                                                                                        'monthly index (g/kWh)'], 12)
    
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)

plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))

Out[14]:

<matplotlib.text.Text at 0x11a5dee90>

In [44]:

order = ['ERCOT', 'SPP']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(-300, 300))

detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
    detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'])
    
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)

plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))

Out[44]:

<matplotlib.text.Text at 0x11c9792d0>

In [15]:

order = ['ERCOT', 'SPP']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(-300, 300))

detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
    detrend_df.loc[detrend_df['region'] == col,
                   'monthly index (g/kWh)'] = shift_detrend(detrend_df.loc[detrend_df['region'] == col,
                                                                           'monthly index (g/kWh)'], 12)
    
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)

plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))

Out[15]:

<matplotlib.text.Text at 0x11ae1a650>

Plot all regions detrended

In [16]:

order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
FG_kwargs = dict(hue='region',
                 col='region',
                 col_wrap=3,
                 aspect=1.2,
                 hue_order=order,
                 ylim=(-300, 300))

detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
    detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'])
    
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime', #col_order=order,
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)

Correlation in Index across regions over time¶

Because each region is decreasing over time, need to detrend the data before finding the correlation matrix. Using the statsmodel detrend function, with an assumption that the trends are linear (order 1).

In [303]:

nerc_index = gen_index.pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region')
nerc_index_detrend = pd.DataFrame(index=nerc_index.index)
for col in nerc_index.columns:
    nerc_index_detrend.loc[:, col] = detrend(nerc_index.loc[:, col])

In [299]:

sns.heatmap(nerc_index_detrend.corr(), square=True, cmap='magma_r')

Out[299]:

<matplotlib.axes._subplots.AxesSubplot at 0x14a5338d0>

Are correlation patterns different in summer vs winter?
Does correlation change over time?

Percent of national generation from each NERC region¶

In [31]:

nerc_gen = gen_index.pivot(index='datetime', columns='region', values='Total gen')
regions = nerc_gen.columns
nerc_gen['Total'] = nerc_gen.sum(axis=1)
percent_nerc_gen = nerc_gen.iloc[:, :-1].divide(nerc_gen.iloc[:, -1], axis=0)

In [32]:

percent_nerc_gen.reset_index(inplace=True)

In [33]:

percent_nerc_gen = pd.melt(percent_nerc_gen, id_vars='datetime',
                           value_name='Percent Generation')

In [281]:

order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order)
region_facet_grid(df=percent_nerc_gen, plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='Percent Generation', 
                  suptitle='Percent generation from each NERC region', FG_kwargs=FG_kwargs)

Generation in each NERC region in 2015¶

In [53]:

nerc_gen['Year'] = nerc_gen.index.year
nerc_gen.groupby('Year').sum()

Out[53]:

region	ERCOT	FRCC	MRO	NPCC	RFC	SERC	SPP	WECC	Total
Year
2001	3.725800e+08	1.909453e+08	2.161632e+08	2.605056e+08	9.399041e+08	9.938657e+08	9.999798e+07	6.451816e+08	3.719144e+09
2002	3.856285e+08	2.033528e+08	2.243845e+08	2.639318e+08	9.726140e+08	1.031267e+09	1.063719e+08	6.522096e+08	3.839760e+09
2003	3.791997e+08	2.126100e+08	2.270116e+08	2.677912e+08	9.679986e+08	1.036856e+09	1.071944e+08	6.671349e+08	3.865796e+09
2004	3.902991e+08	2.181179e+08	2.255123e+08	2.714832e+08	9.882323e+08	1.059385e+09	1.075122e+08	6.920395e+08	3.952582e+09
2005	3.966687e+08	2.202564e+08	2.289179e+08	2.830366e+08	1.018798e+09	1.074424e+09	1.144705e+08	7.005246e+08	4.037097e+09
2006	4.005829e+08	2.237516e+08	2.300444e+08	2.744769e+08	9.998087e+08	1.070732e+09	1.161386e+08	7.308524e+08	4.046387e+09
2007	4.054923e+08	2.254161e+08	2.374609e+08	2.784050e+08	1.022114e+09	1.106244e+09	1.229413e+08	7.402408e+08	4.138315e+09
2008	4.047878e+08	2.196368e+08	2.435205e+08	2.674164e+08	1.000488e+09	1.085137e+09	1.229592e+08	7.572186e+08	4.101164e+09
2009	3.971679e+08	2.179523e+08	2.407059e+08	2.548166e+08	9.153819e+08	1.049575e+09	1.217441e+08	7.352388e+08	3.932583e+09
2010	4.116950e+08	2.290959e+08	2.569122e+08	2.666871e+08	9.769732e+08	1.116945e+09	1.201745e+08	7.287731e+08	4.107256e+09
2011	4.354574e+08	2.218946e+08	2.559552e+08	2.608021e+08	9.513253e+08	1.096300e+09	1.199656e+08	7.406095e+08	4.082310e+09
2012	4.298125e+08	2.210961e+08	2.549966e+08	2.566524e+08	9.218282e+08	1.078700e+09	1.223212e+08	7.447823e+08	4.030189e+09
2013	4.333802e+08	2.223989e+08	2.561657e+08	2.515223e+08	9.307387e+08	1.082139e+09	1.221463e+08	7.505837e+08	4.049075e+09
2014	4.377934e+08	2.301349e+08	2.618739e+08	2.491858e+08	9.424851e+08	1.091380e+09	1.198891e+08	7.551188e+08	4.087861e+09
2015	4.500498e+08	2.375687e+08	2.667626e+08	2.506158e+08	9.148136e+08	1.087641e+09	1.216704e+08	7.453607e+08	4.074483e+09
2016	4.558160e+08	2.381459e+08	2.651052e+08	2.443398e+08	9.231032e+08	1.080953e+09	1.258235e+08	7.460478e+08	4.079335e+09
2017	9.915130e+07	5.071177e+07	6.622780e+07	5.712312e+07	2.159938e+08	2.473217e+08	2.929221e+07	1.810598e+08	9.468813e+08

Percent of each fuel used in each NERC region¶

In [107]:

all_fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Other', 'Other Renewables',
                 'Wind', 'Solar', 'Hydro']
value_cols = ['percent {}'.format(fuel) for fuel in all_fuels]

percent_gen_df = pd.melt(gen_index, id_vars=['region', 'datetime'], 
        value_vars=value_cols, value_name='percent generation',
                        var_name='fuel')
percent_gen_df['fuel'] = percent_gen_df['fuel'].map(lambda x: x.split()[-1])
percent_gen_df['fuel'].replace('Renewables', 'Other Renewables', inplace=True)
percent_gen_df['fuel'].replace('Gas', 'Natural Gas', inplace=True)

In [38]:

percent_gen_df.head()

Out[38]:

	region	datetime	fuel	percent generation
0	ERCOT	2001-01-01	Coal	0.376885
1	ERCOT	2001-02-01	Coal	0.399461
2	ERCOT	2001-03-01	Coal	0.398997
3	ERCOT	2001-04-01	Coal	0.355349
4	ERCOT	2001-05-01	Coal	0.362512

** This could be figure 2?**

In [108]:

percent_gen_df['fuel'].unique()

Out[108]:

array(['Coal', 'Natural Gas', 'Nuclear', 'Other', 'Other Renewables',
       'Wind', 'Solar', 'Hydro'], dtype=object)

In [286]:

order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
fuel_order = ['Coal', 'Natural Gas', 'Nuclear', 'Hydro', 'Wind', 'Solar',
              'Other', 'Other Renewables']
FG_kwargs = dict(hue='fuel',
                 col='region',
                 col_wrap=3,
                 aspect=1.2,
                 hue_order=fuel_order)
region_facet_grid(df=percent_gen_df, plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='percent generation', col_order=order, 
                  suptitle='', FG_kwargs=FG_kwargs)

In [23]:

def region_bokeh_plot(df, fuel, x_axis, y_axis, x_range=None, y_range=None,
                     x_lower_lim=0, x_upper_lim=None):
    figs = {}
    if not x_upper_lim:
        x_upper_lim = df[x_axis].max()
        
    for region in df['region'].unique():
        temp = gen_index.loc[gen_index['region'] == region]
        max_change = temp[x_axis].max()
        
        if max_change <= x_upper_lim and max_change >= x_lower_lim:
            
            source = ColumnDataSource(data=dict(
            x=temp[x_axis],
            y=temp[y_axis],
            datetime=temp['datetime'],
            colors= viridis(len(temp))
            ))
            
            hover = HoverTool(tooltips=[
                                    ("(x,y)", "($x, $y)"),
                                    ("datetime", "@datetime{%F}")],
                             formatters={
                                     'datetime': 'datetime', # use 'datetime' formatter
                                        })
            hover.point_policy = "snap_to_data"

            fuel_2005 = weighted_percent(temp, fuel, 2005)
            fuel_2016 = weighted_percent(temp, fuel, 2016)
            title = region + ' {:.1f}% to {:.1f}% {}'.format(fuel_2005, 
                                                             fuel_2016, fuel)

            figs[region] = figure(title=title, tools=[hover], y_range=y_range, x_range=x_range,
                                 output_backend="webgl")
            figs[region].circle('x', 'y', source=source, color='colors', size=12, alpha=0.5)
            figs[region].xaxis.axis_label = x_axis
            figs[region].yaxis.axis_label = y_axis

    plots = figs.values()
    grid = gridplot(plots, ncols=3, plot_width=240, plot_height=240)

    show(grid)

In [ ]:

figs = {}

for region in percent_gen_df['region'].unique():
    temp = percent_gen_df.loc[percent_gen_df['region'] == region]
    pivoted = pd.pivot_table(temp, index='datetime', columns='fuel')
    pivoted.reset_index(inplace=True, drop=False)

    source = ColumnDataSource(data=dict(
                                        x=pivoted['datetime'],
                                        y=pivoted['percent generation'],
                                        datetime=pivoted['datetime']
                                        ))

#     p = TimeSeries(pivoted, tools='hover')
    hover = HoverTool(tooltips=[
                            ("fuel", "@fuel"),
                            ('% gen', '$y'),
                            ("datetime", "@datetime{%F}")],
                     formatters={
                             'datetime': 'datetime', # use 'datetime' formatter
                                })
    hover.point_policy = "snap_to_data"
    hover.mode = 'vline'
    figs[region] = figure(title=region, tools=[hover],
                         output_backend="webgl")
    figs[region].multi_line('x', 'y', source=source)
    figs[region].xaxis.axis_label = 'datetime'
    figs[region].yaxis.axis_label = 'percent generation'

plots = figs.values()
grid = gridplot(plots, ncols=3, plot_width=240, plot_height=240)

show(grid)

Variability scatter plots¶

Why is variability important?¶

Temporal variability is a sign that decarbonization is happening, but unevenly across the year. If the variability is not correlated between regions, better transmission would allow for a more even spread of low-carbon electricity. As the percent of those technologies grows, each region could export excess low(er)-carbon power. But if they are negatively correlated, then each region is going to be generating higher-CO2 power at the same time. New technologies will be necessary to deal with this.

Ines: Is there a point where variability becomes of return?

This is seasonal (monthly data). Grid reliability reports might be more at the hourly level.
Not even a variability issue. It's just a temporal correlation of Index.

Paper results subsections (order could change - method will be in the same order, short paragraphs that explain each):

Index
Generagion by region
State-level
Correlation across regions
Variability

Plot ideas (figures 3, 4, and 5):

Possibly emissions per capita or GDP (could be at state level) - ranking by state? Index, change in index, index per capita (See Sam's figure 2 - bar plots next to each other )
Coal on X, NG (or fuel) on Y, variability coded by color (or 3 variable triangle plot?)
Small multiples of variability vs fuel for a single region

For results:

Think of 1 figure or table for each subsection
Write 3 sentence punchline about each

Coal¶

This set of plots - and the ones repeated below - show:

The percent generation from coal in a given month against the percent change in normalized variability from a 2005 annual average
The percent change in coal generation from 2005 annual average against the percent change in normalized variability from a 2005 annual average

Analysis:¶

The cleanest relationship that I see is that as coal goes down, variability goes up. The trend is pretty linear, and extends across a bunch of different NERC regions. A few interesting notes:

SPP sees a steep decline in coal use from ~80% to 20-30%, but the variability doesn't go up until percent coal drops below 40%
In contrast, RFC sees an immediate linear increase in variability from a similar starting point
FRCC doesn't see much of an increase in variability despite the drop in coal from ~40% to 10-20%
The other NERC regions (NPCC, ERCOT, WECC, SERC, RFC, and MRO) all have similar linear responses to the drop in coal use even though they have different starting points for percent of coal

Percent Coal vs Change in norm variability¶

In [24]:

region_bokeh_plot(gen_index, 'Coal', 'percent Coal', 
                  'change in norm variability',
                  y_range=(-1, 7), x_range=(0, 0.85))

Change in Coal vs Change in norm variability¶

In [25]:

region_bokeh_plot(gen_index, 'Coal', 'change in Coal', 
                  'change in norm variability', y_range=(-1, 7))

Wind¶

Percent Wind vs Change in norm variability¶

In [26]:

region_bokeh_plot(gen_index, 'Wind', 'percent Wind', 
                  'change in norm variability', 
                  y_range=(-1, 7), x_range=(0, 0.35))

Change in Wind vs Change in norm variability¶

In [27]:

region_bokeh_plot(gen_index, 'Wind', 'change in Wind', 
                  'change in norm variability', y_range=(-1, 7))

Natural gas¶

Percent Natural Gas vs Change in norm variability¶

In [28]:

region_bokeh_plot(gen_index, 'Natural Gas', 'percent Natural Gas', 
                  'change in norm variability',
                  y_range=(-1, 7), x_range=(0, 0.75))

Change in Natural Gas vs Change in norm variability¶

In [29]:

region_bokeh_plot(gen_index, 'Natural Gas', 'change in Natural Gas', 
                  'change in norm variability', y_range=(-1, 7))

Hydro¶

Percent Hydro vs Change in norm variability¶

In [30]:

region_bokeh_plot(gen_index, 'Hydro', 'percent Hydro', 
                  'change in norm variability',
                  y_range=(-1, 7), x_range=(0, 0.45))

Change in Hydro vs Change in norm variability¶

In [31]:

region_bokeh_plot(gen_index, 'Hydro', 'change in Hydro', 
                  'change in norm variability', y_range=(-1, 7))

Solar¶

Percent Solar vs Change in norm variability¶

In [32]:

region_bokeh_plot(gen_index, 'Solar', 'percent Solar', 
                  'change in norm variability',
                  y_range=(-1, 7), x_range=(0, 0.08))

Change in Solar vs Change in norm variability¶

Most NERC regions didn't have recorded solar in 2005, so this plot doesn't make much sense

In [33]:

region_bokeh_plot(gen_index, 'Solar', 'change in Solar', 
                  'change in norm variability', y_range=(-1, 7))

Nuclear¶

Percent Solar vs Change in norm variability¶

In [34]:

region_bokeh_plot(gen_index, 'Nuclear', 'percent Nuclear', 
                  'change in norm variability',
                  y_range=(-1, 7), x_range=(0, 0.35))

In [35]:

region_bokeh_plot(gen_index, 'Solar', 'change in Solar', 
                  'change in norm variability', y_range=(-1, 7))

¶

In [235]:

order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
def facet_scatter(x, y, c, **kwargs):
    """Draw scatterplot with point colors from a faceted DataFrame columns."""
    kwargs.pop("color")
    plt.scatter(x, y, c=c, **kwargs)

vmin = gen_index['Normalized Index variability'].min()
vmax = gen_index['Normalized Index variability'].max()
cmap = plt.get_cmap('viridis')
#sns.diverging_palette(240, 10, l=65, center="dark", as_cmap=True)

g = sns.FacetGrid(gen_index, col='region', col_wrap=3, aspect=1.2,
                  col_order=order, palette='viridis')
g.map(facet_scatter, 'percent Coal', 'percent Natural Gas', 
      'Normalized Index variability', alpha=0.5, s=100,
      vmin=vmin, vmax=vmax, cmap=cmap)

# Make space for the colorbar
# g.fig.subplots_adjust(right=.92)

# Define a new Axes where the colorbar will go
# cax = g.fig.add_axes([.94, .25, .02, .6])
cax = g.fig.add_axes([.82, .05, .02, .23])
# Get a mappable object with the same colormap as the data
points = plt.scatter([], [], c=[], vmin=vmin, vmax=vmax, cmap=cmap)

# Draw the colorbar
g.fig.colorbar(points, cax=cax).outline.set_visible(False)
g.fig.text(.84, .3, 'Normalized Variability', ha='center')

Out[235]:

<seaborn.axisgrid.FacetGrid at 0x150bab710>

Out[235]:

<matplotlib.text.Text at 0x151356a10>

In [172]:

order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
def facet_scatter(x, y, c, **kwargs):
    """Draw scatterplot with point colors from a faceted DataFrame columns."""
    kwargs.pop("color")
    plt.scatter(x, y, c=c, **kwargs)

vmin = gen_index['Normalized Index variability'].min()
vmax = gen_index['Normalized Index variability'].max()
cmap = plt.get_cmap('viridis')
#sns.diverging_palette(240, 10, l=65, center="dark", as_cmap=True)

g = sns.FacetGrid(gen_index, col='region', col_wrap=3, aspect=1.2,
                  col_order=order, palette='viridis')
g.map(facet_scatter, 'percent Coal', 'percent Wind', 
      'Normalized Index variability', alpha=0.5, s=100,
      vmin=vmin, vmax=vmax, cmap=cmap)

# Make space for the colorbar
# g.fig.subplots_adjust(right=.92)

# Define a new Axes where the colorbar will go
# cax = g.fig.add_axes([.94, .25, .02, .6])
cax = g.fig.add_axes([.82, .05, .02, .23])
# Get a mappable object with the same colormap as the data
points = plt.scatter([], [], c=[], vmin=vmin, vmax=vmax, cmap=cmap)

# Draw the colorbar
g.fig.colorbar(points, cax=cax).outline.set_visible(False)
g.fig.text(.84, .3, 'Normalized Variability', ha='center')

Out[172]:

<seaborn.axisgrid.FacetGrid at 0x13d980c90>

Out[172]:

<matplotlib.text.Text at 0x13aef74d0>

In [173]:

import ternary

In [175]:

points = gen_index

figure, tax = ternary.figure(scale=1)
tax.scatter()

In [227]:

color_col = 'Normalized Index variability'
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
gen_index['points'] = gen_index[['percent Coal', 'percent Natural Gas', 'percent Wind']].apply(tuple, axis=1)
def ternary_facet_scatter(points, c, **kwargs):
    """Draw scatterplot with point colors from a faceted DataFrame columns."""
    kwargs.pop("color")
    scale = 1
    ax = plt.gca()
    figure, tax = ternary.figure(ax=ax, scale=scale)
    tax.boundary(linewidth = 1.0)
#     tax.gridlines(multiple=10,color="blue")
    tax.scatter(points, c=c, **kwargs)
    tax.left_axis_label("Percent Wind")
    tax.right_axis_label("Percent Natural Gas")
    tax.bottom_axis_label("Percent Coal")
    tax.gridlines(multiple=.25, color='k', linestyle='-')
    tax.ticks(axis='lbr', multiple=0.25, linewidth=0)
    tax.clear_matplotlib_ticks()
#     plt.scatter(x, y, c=c, **kwargs)

vmin = gen_index[color_col].min()
vmax = gen_index[color_col].max()
cmap = plt.get_cmap('viridis')
#sns.diverging_palette(240, 10, l=65, center="dark", as_cmap=True)

with sns.axes_style('white', {'axes.linewidth': 0,
                             'axes.grid': False}):
    g = sns.FacetGrid(gen_index, col='region', col_wrap=3,
                      col_order=order, palette='viridis')
    g.map(ternary_facet_scatter, 'points',
          color_col, alpha=0.5, s=100, cmap=cmap,
          vmin=vmin, vmax=vmax)

    # Make space for the colorbar
    # g.fig.subplots_adjust(right=.92)

    # Define a new Axes where the colorbar will go
    # cax = g.fig.add_axes([.94, .25, .02, .6])
    cax = g.fig.add_axes([.82, .05, .02, .23])
    # Get a mappable object with the same colormap as the data
    points = plt.scatter([], [], c=[], cmap=cmap, vmin=vmin, vmax=vmax)

    # Draw the colorbar
    g.fig.colorbar(points, cax=cax).outline.set_visible(False)
    g.fig.text(.84, .3, 'Normalized Variability', ha='center')
    axes = g.axes.flatten()
    for ax, title in zip(axes, order):
        ax.set_title(title)
        ax.set_xlabel('')
        ax.set_ylabel('')

Out[227]:

<seaborn.axisgrid.FacetGrid at 0x14d450990>

Out[227]:

<matplotlib.text.Text at 0x1503e67d0>

Out[227]:

<matplotlib.text.Text at 0x14f7e8490>

Out[227]:

<matplotlib.text.Text at 0x14f5a7090>

Out[227]:

<matplotlib.text.Text at 0x14f791490>

Out[227]:

<matplotlib.text.Text at 0x14f728610>

Out[227]:

<matplotlib.text.Text at 0x14f6f6a50>

Out[227]:

<matplotlib.text.Text at 0x14f70d090>

Out[227]:

<matplotlib.text.Text at 0x14f77e710>

Out[227]:

<matplotlib.text.Text at 0x14f9fc850>

Out[227]:

<matplotlib.text.Text at 0x14f76de50>

Out[227]:

<matplotlib.text.Text at 0x14fa3d210>

Out[227]:

<matplotlib.text.Text at 0x14fa28450>

Out[227]:

<matplotlib.text.Text at 0x14fa3d310>

Out[227]:

<matplotlib.text.Text at 0x14fbbbb50>

Out[227]:

<matplotlib.text.Text at 0x14fb99690>

Out[227]:

<matplotlib.text.Text at 0x14fa28f90>

Out[227]:

<matplotlib.text.Text at 0x14fc2c390>

Out[227]:

<matplotlib.text.Text at 0x14fbffcd0>

Out[227]:

<matplotlib.text.Text at 0x14fc16b90>

Out[227]:

<matplotlib.text.Text at 0x14a1e7350>

Out[227]:

<matplotlib.text.Text at 0x14fc86ad0>

Out[227]:

<matplotlib.text.Text at 0x14fc9c990>

Out[227]:

<matplotlib.text.Text at 0x14eb1f7d0>

Out[227]:

<matplotlib.text.Text at 0x14ec62110>

Out[227]:

<matplotlib.text.Text at 0x14ea27290>

In [232]:

df = gen_index.loc[gen_index['region'] == 'FRCC']
points = df['points']
c = df['Normalized Index variability']
scale = 1
# ax = plt.gca()
figure, tax = ternary.figure(scale=scale)
tax.boundary(linewidth = 1.0)
#     tax.gridlines(multiple=10,color="blue")
tax.scatter(points, c=c)
tax.left_axis_label("Percent Wind")
tax.right_axis_label("Percent Natural Gas")
tax.bottom_axis_label("Percent Coal")
tax.gridlines(multiple=.25, color='k', linestyle='-')
tax.ticks(axis='lbr', multiple=0.25, linewidth=0)
tax.clear_matplotlib_ticks()
tax.show()

Out[232]:

<matplotlib.axes._subplots.AxesSubplot at 0x150a3ad90>