#!/usr/bin/env python # coding: utf-8
# In[1]: from IPython.core.interactiveshell import InteractiveShell InteractiveShell.ast_node_interactivity = "last" # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import pandas as pd import seaborn as sns from Index_Calculations import facility_index_gen, index_and_generation import os import glob import numpy as np from statsmodels.tsa.tsatools import detrend # ## Group state-level total gen into approx NERC regions # These definitions are taken from Pétron (2008). # Check actual generation from facilities in 2015 against the state-aggregation method # In[2]: states_in_NERC = {'WECC': ['WA', 'OR', 'CA', 'NV', 'AZ', 'UT', 'ID', 'MT', 'WY', 'CO', 'NM'], 'MRO': ['ND', 'SD', 'NE', 'MN', 'IA', 'WI'], 'SPP': ['KS', 'OK'], 'ERCOT': ['TX'], 'SERC': ['MO', 'IL', 'AR', 'LA', 'MS', 'AL', 'GA', 'SC', 'NC', 'TN'], 'FRCC': ['FL'], 'RFC': ['MI', 'IN', 'OH', 'PA', 'WV', 'KY', 'VA', 'NJ', 'DE', 'MD'], 'NPCC': ['NY', 'VT', 'ME', 'NH', 'MA', 'RI', 'CT']} # In[24]: for nerc, states in states_in_NERC.iteritems(): paths = [os.path.join('Data storage', 'state gen data', state + ' fuels gen.csv') for state in states] nerc_list = [pd.read_csv(path) for path in paths] nerc_df = pd.concat(nerc_list) nerc_df.reset_index(inplace=True, drop=True) path_out = os.path.join('Data storage', 'state gen data', nerc + ' fuels gen.csv') nerc_df.to_csv(path_out, index=False) # ## Calculate index values for each NERC region # In[25]: facility_path = os.path.join('Data storage', 'Facility gen fuels and CO2 2017-05-25.zip') epa_path = os.path.join('Data storage', 'Monthly EPA emissions 2017-05-25.csv') ef_path = os.path.join('Data storage', 'Final emission factors.csv') out_folder = os.path.join('Data storage', 'final NERC data from states') NERC_path = os.path.join('Data storage', 'Facility NERC labels.csv') for nerc in states_in_NERC.keys(): all_fuel_path = os.path.join('Data storage', 'state gen data', nerc + ' fuels gen.csv') index_and_generation(facility_path=facility_path, all_fuel_path = all_fuel_path, epa_path=epa_path, emission_factor_path=ef_path, export_folder=out_folder, export_path_ext=' ' + nerc, state=states_in_NERC[nerc]) # ## Read data and create figures # In[3]: path = os.path.join('Data storage', 'final NERC data from states', 'Monthly index*') mi_fns = glob.glob(path) path = os.path.join('Data storage', 'final NERC data from states', 'Monthly gen*') mg_fns = glob.glob(path) # In[4]: df_list = [] for f in mi_fns: region = f.split()[-1][:-4] df = pd.read_csv(f) df['region'] = region df_list.append(df) full_mi = pd.concat(df_list) full_mi.reset_index(inplace=True, drop=True) full_mi.rename(columns={'index (g/kWh)': 'monthly index (g/kWh)'}, inplace=True) full_mi['datetime'] = pd.to_datetime(full_mi['datetime']) # In[5]: df_list = [] for f in mg_fns: region = f.split()[-1][:-4] df = pd.read_csv(f) df['region'] = region df_list.append(df) full_mg = pd.concat(df_list) full_mg.reset_index(inplace=True, drop=True) full_mg['datetime'] = pd.to_datetime(full_mg['datetime']) monthly_gen = pd.pivot_table(full_mg, index=['region', 'datetime'], values='generation (MWh)', columns='fuel category 1') monthly_gen.reset_index(inplace=True, drop=False) monthly_gen['Year'] = monthly_gen['datetime'].dt.year monthly_gen.replace(np.nan, 0, inplace=True) # In[6]: gen_index = pd.merge(monthly_gen, full_mi[['datetime', 'region', 'monthly index (g/kWh)']], on=['datetime', 'region']) # In[7]: gen_index.head() # In[7]: for region in gen_index['region'].unique(): gen_index.loc[gen_index['region'] == region, 'Index variability'] = \ gen_index.loc[gen_index['region']==region, 'monthly index (g/kWh)'].rolling(window=12).std() gen_index.loc[gen_index['region'] == region, 'Normalized Index variability'] = \ gen_index.loc[gen_index['region']==region, 'Index variability'] / \ gen_index.loc[gen_index['region']==region, 'monthly index (g/kWh)'].rolling(window=12).mean() # In[8]: base_year = 2005 # In[9]: fuels = ['Coal', 'Natural Gas', 'Other Renewables', 'Nuclear', 'Other', 'Solar', 'Wind', 'Hydro'] # fuels = ['Coal', 'Natural Gas', 'Renewables', 'Nuclear', 'Other'] gen_index['Total gen'] = gen_index.loc[:, fuels].sum(axis=1) for fuel in fuels: # New columns that are being added col_percent = 'percent ' + fuel col_change = 'change in ' + fuel # Calculate percent of generation from each fuel type gen_index[col_percent] = gen_index.loc[:, fuel] / gen_index.loc[:, 'Total gen'] # Percent of fuel in region in base year (entire year) for region in gen_index['region'].unique(): percent_fuel_base = gen_index.loc[(gen_index['Year'] == base_year) & (gen_index['region'] == region), fuel].sum() / gen_index.loc[(gen_index['Year'] == base_year) & (gen_index['region'] == region), 'Total gen'].sum() # Use percent of fuel in base year to calculate change for each region/month gen_index.loc[gen_index['region'] == region, col_change] = (gen_index.loc[gen_index['region'] == region, col_percent] - percent_fuel_base) / percent_fuel_base # Change in variability compared to average base year value for region in gen_index['region'].unique(): norm_variability_base = gen_index.loc[(gen_index['Year'] == base_year) & (gen_index['region'] == region), 'Normalized Index variability'].mean() variability_base = gen_index.loc[(gen_index['Year'] == base_year) & (gen_index['region'] == region), 'Index variability'].mean() gen_index.loc[gen_index['region'] == region, 'change in variability'] = (gen_index.loc[gen_index['region'] == region, 'Index variability'] - variability_base) / variability_base gen_index.loc[gen_index['region'] == region, 'change in norm variability'] = (gen_index.loc[gen_index['region'] == region, 'Normalized Index variability'] - norm_variability_base) / norm_variability_base # In[18]: from bokeh.plotting import figure, show, output_notebook, ColumnDataSource from bokeh.plotting import * from bokeh.layouts import gridplot, column from bokeh.models import HoverTool, Div from bokeh.palettes import viridis from bokeh.charts import TimeSeries from bokeh.io import export_svgs # In[19]: output_notebook() # In[20]: def weighted_percent(df, fuel, year): all_fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Other', 'Other Renewables', 'Wind', 'Solar', 'Hydro'] temp = df.loc[df['Year'] == year, all_fuels] temp['Total'] = temp.sum(axis=1) weighted_per = temp[fuel].sum() / temp['Total'].sum() * 100 return weighted_per # In[13]: gen_index.columns # ### Actual and Normalized variability over time in each NERC region # # At the national level, Index variability went up after 2007, and then dipped back down briefly in 2013. This trend can be seen in a few, but not all, of the regions. # # I'm not sure what the cyclical trends in Index variability represent. These values are 12 month rolling standard deviations - they shouldn't be affected by seasonal variation. # **Update:** Upticks in the variability or normalized variability could be due to one or more months where the Index value jumped up or down well outside the normal range. This would increase the rolling standard deviation for 12 months. # # The normalized variability here is a 12 month rolling standard deviation of CO2 intensity divided by the 12 month rolling average. I'm suspicious of the results in MRO. # In[12]: sns.set_style('white', {'axes.linewidth': 1.5, 'axes.grid': True}) sns.set_context('notebook', font_scale=1.2) # In[13]: def region_facet_grid(df, plot_function, x_axis, y_axis, col_order=None, suptitle='', add_legend=False, ax_labels=None, FG_kwargs={}, plot_kwargs={}): g = sns.FacetGrid(df, col_order=col_order, **FG_kwargs) g.map(plot_function, x_axis, y_axis, **plot_kwargs) g.set_xticklabels(rotation=35) if add_legend: g.add_legend() if suptitle: plt.suptitle(suptitle, y=1.02, size=15) if col_order and 'col' in FG_kwargs: axes = g.axes.flatten() for ax, title in zip(axes, order): ax.set_title(title) if ax_labels: g.set_axis_labels(ax_labels) # In[278]: order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC', 'WECC', 'NPCC'] FG_kwargs = dict(col='region', hue='region', col_wrap=3, aspect=1.2, hue_order=order) region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime', y_axis='Index variability', col_order=order, suptitle='Index Variability', FG_kwargs=FG_kwargs) # In[273]: order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC', 'WECC', 'NPCC'] FG_kwargs = dict(col='region', hue='region', col_wrap=3, aspect=1.2, hue_order=order) region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime', y_axis='Normalized Index variability', col_order=order, suptitle='Normalized Index Variability', FG_kwargs=FG_kwargs) # ### Actual index values over time in each NERC region # **Figure 1 of paper - also include national** # In[275]: order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC', 'WECC', 'NPCC'] FG_kwargs = dict(col='region', hue='region', col_wrap=3, aspect=1.2, hue_order=order, ylim=(0, 1050)) region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime', y_axis='monthly index (g/kWh)', col_order=order, suptitle='Monthly Index', FG_kwargs=FG_kwargs) # In[280]: order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC', 'WECC', 'NPCC'] FG_kwargs = dict(hue='region', aspect=1.5, size=5, hue_order=order, ylim=(0, 1050)) region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime', add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, suptitle='Monthly Index', FG_kwargs=FG_kwargs) # #### Example of Index correlation between two regions, with and without detrending # Trying to show why detrending is needed when using the correlation to determine seasonal differences between regions. # In[38]: order = ['FRCC', 'SPP'] FG_kwargs = dict(hue='region', aspect=1.5, size=5, hue_order=order, ylim=(200, 1050)) corr_coef = gen_index.loc[gen_index['region'].isin(order)].pivot_table(values='monthly index (g/kWh)', index='datetime', columns='region').corr() corr_coef = corr_coef.iloc[1, 0] region_facet_grid(df=gen_index.loc[gen_index['region'].isin(order)], plot_function=plt.plot, x_axis='datetime', add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, suptitle='Monthly Index (no change)', FG_kwargs=FG_kwargs) plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef)) # In[43]: order = ['ERCOT', 'SPP'] FG_kwargs = dict(hue='region', aspect=1.5, size=5, hue_order=order, ylim=(200, 1050)) corr_coef = gen_index.loc[gen_index['region'].isin(order)].pivot_table(values='monthly index (g/kWh)', index='datetime', columns='region').corr() corr_coef = corr_coef.iloc[1, 0] region_facet_grid(df=gen_index.loc[gen_index['region'].isin(order)], plot_function=plt.plot, x_axis='datetime', add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, suptitle='Monthly Index (no change)', FG_kwargs=FG_kwargs) plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef)) # ### Correlation of detrended data # Trying both a linear detrend and subtracting the previous month's value. # # I'm not sure if I like the method of subtracting the previous month's value. Need to see if it is retaining the seasonal behavior. # In[41]: order = ['FRCC', 'SPP'] FG_kwargs = dict(hue='region', aspect=1.5, size=5, hue_order=order, ylim=(-300, 300)) detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy() for col in order: detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)']) corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)', index='datetime', columns='region').corr() corr_coef = corr_coef.iloc[1, 0] region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime', add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs) plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef)) # In[10]: def shift_detrend(series, n): 'Shift a series by n periods to detrend' detrended = series - series.shift(n) return detrended # In[14]: order = ['FRCC', 'SPP'] FG_kwargs = dict(hue='region', aspect=1.5, size=5, hue_order=order, ylim=(-300, 300)) detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy() for col in order: detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = shift_detrend(detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'], 12) corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)', index='datetime', columns='region').corr() corr_coef = corr_coef.iloc[1, 0] region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime', add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs) plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef)) # In[44]: order = ['ERCOT', 'SPP'] FG_kwargs = dict(hue='region', aspect=1.5, size=5, hue_order=order, ylim=(-300, 300)) detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy() for col in order: detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)']) corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)', index='datetime', columns='region').corr() corr_coef = corr_coef.iloc[1, 0] region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime', add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs) plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef)) # In[15]: order = ['ERCOT', 'SPP'] FG_kwargs = dict(hue='region', aspect=1.5, size=5, hue_order=order, ylim=(-300, 300)) detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy() for col in order: detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = shift_detrend(detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'], 12) corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)', index='datetime', columns='region').corr() corr_coef = corr_coef.iloc[1, 0] region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime', add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs) plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef)) # Plot all regions detrended # In[16]: order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC', 'WECC', 'NPCC'] FG_kwargs = dict(hue='region', col='region', col_wrap=3, aspect=1.2, hue_order=order, ylim=(-300, 300)) detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy() for col in order: detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)']) corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)', index='datetime', columns='region').corr() corr_coef = corr_coef.iloc[1, 0] region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime', #col_order=order, add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs) # ### Correlation in Index across regions over time # Because each region is decreasing over time, need to detrend the data before finding the correlation matrix. Using the statsmodel detrend function, with an assumption that the trends are linear (order 1). # In[303]: nerc_index = gen_index.pivot_table(values='monthly index (g/kWh)', index='datetime', columns='region') nerc_index_detrend = pd.DataFrame(index=nerc_index.index) for col in nerc_index.columns: nerc_index_detrend.loc[:, col] = detrend(nerc_index.loc[:, col]) # In[299]: sns.heatmap(nerc_index_detrend.corr(), square=True, cmap='magma_r') # - Are correlation patterns different in summer vs winter? # - Does correlation change over time? # ### Percent of national generation from each NERC region # In[31]: nerc_gen = gen_index.pivot(index='datetime', columns='region', values='Total gen') regions = nerc_gen.columns nerc_gen['Total'] = nerc_gen.sum(axis=1) percent_nerc_gen = nerc_gen.iloc[:, :-1].divide(nerc_gen.iloc[:, -1], axis=0) # In[32]: percent_nerc_gen.reset_index(inplace=True) # In[33]: percent_nerc_gen = pd.melt(percent_nerc_gen, id_vars='datetime', value_name='Percent Generation') # In[281]: order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC', 'WECC', 'NPCC'] FG_kwargs = dict(hue='region', aspect=1.5, size=5, hue_order=order) region_facet_grid(df=percent_nerc_gen, plot_function=plt.plot, x_axis='datetime', add_legend=True, y_axis='Percent Generation', suptitle='Percent generation from each NERC region', FG_kwargs=FG_kwargs) # ### Generation in each NERC region in 2015 # In[53]: nerc_gen['Year'] = nerc_gen.index.year nerc_gen.groupby('Year').sum() # ### Percent of each fuel used in each NERC region # In[107]: all_fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Other', 'Other Renewables', 'Wind', 'Solar', 'Hydro'] value_cols = ['percent {}'.format(fuel) for fuel in all_fuels] percent_gen_df = pd.melt(gen_index, id_vars=['region', 'datetime'], value_vars=value_cols, value_name='percent generation', var_name='fuel') percent_gen_df['fuel'] = percent_gen_df['fuel'].map(lambda x: x.split()[-1]) percent_gen_df['fuel'].replace('Renewables', 'Other Renewables', inplace=True) percent_gen_df['fuel'].replace('Gas', 'Natural Gas', inplace=True) # In[38]: percent_gen_df.head() # ** This could be figure 2?** # In[108]: percent_gen_df['fuel'].unique() # In[286]: order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC', 'WECC', 'NPCC'] fuel_order = ['Coal', 'Natural Gas', 'Nuclear', 'Hydro', 'Wind', 'Solar', 'Other', 'Other Renewables'] FG_kwargs = dict(hue='fuel', col='region', col_wrap=3, aspect=1.2, hue_order=fuel_order) region_facet_grid(df=percent_gen_df, plot_function=plt.plot, x_axis='datetime', add_legend=True, y_axis='percent generation', col_order=order, suptitle='', FG_kwargs=FG_kwargs) # In[23]: def region_bokeh_plot(df, fuel, x_axis, y_axis, x_range=None, y_range=None, x_lower_lim=0, x_upper_lim=None): figs = {} if not x_upper_lim: x_upper_lim = df[x_axis].max() for region in df['region'].unique(): temp = gen_index.loc[gen_index['region'] == region] max_change = temp[x_axis].max() if max_change <= x_upper_lim and max_change >= x_lower_lim: source = ColumnDataSource(data=dict( x=temp[x_axis], y=temp[y_axis], datetime=temp['datetime'], colors= viridis(len(temp)) )) hover = HoverTool(tooltips=[ ("(x,y)", "($x, $y)"), ("datetime", "@datetime{%F}")], formatters={ 'datetime': 'datetime', # use 'datetime' formatter }) hover.point_policy = "snap_to_data" fuel_2005 = weighted_percent(temp, fuel, 2005) fuel_2016 = weighted_percent(temp, fuel, 2016) title = region + ' {:.1f}% to {:.1f}% {}'.format(fuel_2005, fuel_2016, fuel) figs[region] = figure(title=title, tools=[hover], y_range=y_range, x_range=x_range, output_backend="webgl") figs[region].circle('x', 'y', source=source, color='colors', size=12, alpha=0.5) figs[region].xaxis.axis_label = x_axis figs[region].yaxis.axis_label = y_axis plots = figs.values() grid = gridplot(plots, ncols=3, plot_width=240, plot_height=240) show(grid) # In[ ]: figs = {} for region in percent_gen_df['region'].unique(): temp = percent_gen_df.loc[percent_gen_df['region'] == region] pivoted = pd.pivot_table(temp, index='datetime', columns='fuel') pivoted.reset_index(inplace=True, drop=False) source = ColumnDataSource(data=dict( x=pivoted['datetime'], y=pivoted['percent generation'], datetime=pivoted['datetime'] )) # p = TimeSeries(pivoted, tools='hover') hover = HoverTool(tooltips=[ ("fuel", "@fuel"), ('% gen', '$y'), ("datetime", "@datetime{%F}")], formatters={ 'datetime': 'datetime', # use 'datetime' formatter }) hover.point_policy = "snap_to_data" hover.mode = 'vline' figs[region] = figure(title=region, tools=[hover], output_backend="webgl") figs[region].multi_line('x', 'y', source=source) figs[region].xaxis.axis_label = 'datetime' figs[region].yaxis.axis_label = 'percent generation' plots = figs.values() grid = gridplot(plots, ncols=3, plot_width=240, plot_height=240) show(grid) # ## Variability scatter plots # ### Why is variability important? # # Temporal variability is a sign that decarbonization is happening, but unevenly across the year. If the variability is not correlated between regions, better transmission would allow for a more even spread of low-carbon electricity. As the percent of those technologies grows, each region could export excess low(er)-carbon power. But if they are negatively correlated, then each region is going to be generating higher-CO2 power at the same time. New technologies will be necessary to deal with this. # # **Ines**: Is there a point where variability becomes of return? # - This is seasonal (monthly data). Grid reliability reports might be more at the hourly level. # - Not even a variability issue. It's just a temporal correlation of Index. # **Paper results subsections** (order could change - method will be in the same order, short paragraphs that explain each): # - Index # - Generagion by region # - State-level # - Correlation across regions # - Variability # Plot ideas (figures 3, 4, and 5): # - Possibly emissions per capita or GDP (could be at state level) - ranking by state? Index, change in index, index per capita (See Sam's figure 2 - bar plots next to each other ) # - Coal on X, NG (or fuel) on Y, variability coded by color (or 3 variable triangle plot?) # - Small multiples of variability vs fuel for a single region # For results: # - Think of 1 figure or table for each subsection # - Write 3 sentence punchline about each # ### Coal # This set of plots - and the ones repeated below - show: # 1. The percent generation from coal in a given month against the percent change in normalized variability from a 2005 annual average # 2. The percent change in coal generation from 2005 annual average against the percent change in normalized variability from a 2005 annual average # #### Analysis: # The cleanest relationship that I see is that as coal goes down, variability goes up. The trend is pretty linear, and extends across a bunch of different NERC regions. A few interesting notes: # - SPP sees a steep decline in coal use from ~80% to 20-30%, but the variability doesn't go up until percent coal drops below 40% # - In contrast, RFC sees an immediate linear increase in variability from a similar starting point # - FRCC doesn't see much of an increase in variability despite the drop in coal from ~40% to 10-20% # - The other NERC regions (NPCC, ERCOT, WECC, SERC, RFC, and MRO) all have similar linear responses to the drop in coal use even though they have different starting points for percent of coal # #### Percent Coal vs Change in norm variability # In[24]: region_bokeh_plot(gen_index, 'Coal', 'percent Coal', 'change in norm variability', y_range=(-1, 7), x_range=(0, 0.85)) # #### Change in Coal vs Change in norm variability # In[25]: region_bokeh_plot(gen_index, 'Coal', 'change in Coal', 'change in norm variability', y_range=(-1, 7)) # ### Wind # #### Percent Wind vs Change in norm variability # In[26]: region_bokeh_plot(gen_index, 'Wind', 'percent Wind', 'change in norm variability', y_range=(-1, 7), x_range=(0, 0.35)) # #### Change in Wind vs Change in norm variability # In[27]: region_bokeh_plot(gen_index, 'Wind', 'change in Wind', 'change in norm variability', y_range=(-1, 7)) # ### Natural gas # #### Percent Natural Gas vs Change in norm variability # In[28]: region_bokeh_plot(gen_index, 'Natural Gas', 'percent Natural Gas', 'change in norm variability', y_range=(-1, 7), x_range=(0, 0.75)) # #### Change in Natural Gas vs Change in norm variability # In[29]: region_bokeh_plot(gen_index, 'Natural Gas', 'change in Natural Gas', 'change in norm variability', y_range=(-1, 7)) # ### Hydro # #### Percent Hydro vs Change in norm variability # In[30]: region_bokeh_plot(gen_index, 'Hydro', 'percent Hydro', 'change in norm variability', y_range=(-1, 7), x_range=(0, 0.45)) # #### Change in Hydro vs Change in norm variability # In[31]: region_bokeh_plot(gen_index, 'Hydro', 'change in Hydro', 'change in norm variability', y_range=(-1, 7)) # ### Solar # #### Percent Solar vs Change in norm variability # In[32]: region_bokeh_plot(gen_index, 'Solar', 'percent Solar', 'change in norm variability', y_range=(-1, 7), x_range=(0, 0.08)) # #### Change in Solar vs Change in norm variability # # Most NERC regions didn't have recorded solar in 2005, so this plot doesn't make much sense # In[33]: region_bokeh_plot(gen_index, 'Solar', 'change in Solar', 'change in norm variability', y_range=(-1, 7)) # ### Nuclear # #### Percent Solar vs Change in norm variability # In[34]: region_bokeh_plot(gen_index, 'Nuclear', 'percent Nuclear', 'change in norm variability', y_range=(-1, 7), x_range=(0, 0.35)) # In[35]: region_bokeh_plot(gen_index, 'Solar', 'change in Solar', 'change in norm variability', y_range=(-1, 7)) # ### # In[235]: order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC', 'WECC', 'NPCC'] def facet_scatter(x, y, c, **kwargs): """Draw scatterplot with point colors from a faceted DataFrame columns.""" kwargs.pop("color") plt.scatter(x, y, c=c, **kwargs) vmin = gen_index['Normalized Index variability'].min() vmax = gen_index['Normalized Index variability'].max() cmap = plt.get_cmap('viridis') #sns.diverging_palette(240, 10, l=65, center="dark", as_cmap=True) g = sns.FacetGrid(gen_index, col='region', col_wrap=3, aspect=1.2, col_order=order, palette='viridis') g.map(facet_scatter, 'percent Coal', 'percent Natural Gas', 'Normalized Index variability', alpha=0.5, s=100, vmin=vmin, vmax=vmax, cmap=cmap) # Make space for the colorbar # g.fig.subplots_adjust(right=.92) # Define a new Axes where the colorbar will go # cax = g.fig.add_axes([.94, .25, .02, .6]) cax = g.fig.add_axes([.82, .05, .02, .23]) # Get a mappable object with the same colormap as the data points = plt.scatter([], [], c=[], vmin=vmin, vmax=vmax, cmap=cmap) # Draw the colorbar g.fig.colorbar(points, cax=cax).outline.set_visible(False) g.fig.text(.84, .3, 'Normalized Variability', ha='center') # In[172]: order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC', 'WECC', 'NPCC'] def facet_scatter(x, y, c, **kwargs): """Draw scatterplot with point colors from a faceted DataFrame columns.""" kwargs.pop("color") plt.scatter(x, y, c=c, **kwargs) vmin = gen_index['Normalized Index variability'].min() vmax = gen_index['Normalized Index variability'].max() cmap = plt.get_cmap('viridis') #sns.diverging_palette(240, 10, l=65, center="dark", as_cmap=True) g = sns.FacetGrid(gen_index, col='region', col_wrap=3, aspect=1.2, col_order=order, palette='viridis') g.map(facet_scatter, 'percent Coal', 'percent Wind', 'Normalized Index variability', alpha=0.5, s=100, vmin=vmin, vmax=vmax, cmap=cmap) # Make space for the colorbar # g.fig.subplots_adjust(right=.92) # Define a new Axes where the colorbar will go # cax = g.fig.add_axes([.94, .25, .02, .6]) cax = g.fig.add_axes([.82, .05, .02, .23]) # Get a mappable object with the same colormap as the data points = plt.scatter([], [], c=[], vmin=vmin, vmax=vmax, cmap=cmap) # Draw the colorbar g.fig.colorbar(points, cax=cax).outline.set_visible(False) g.fig.text(.84, .3, 'Normalized Variability', ha='center') # In[173]: import ternary # In[175]: points = gen_index figure, tax = ternary.figure(scale=1) tax.scatter() # In[227]: color_col = 'Normalized Index variability' order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC', 'WECC', 'NPCC'] gen_index['points'] = gen_index[['percent Coal', 'percent Natural Gas', 'percent Wind']].apply(tuple, axis=1) def ternary_facet_scatter(points, c, **kwargs): """Draw scatterplot with point colors from a faceted DataFrame columns.""" kwargs.pop("color") scale = 1 ax = plt.gca() figure, tax = ternary.figure(ax=ax, scale=scale) tax.boundary(linewidth = 1.0) # tax.gridlines(multiple=10,color="blue") tax.scatter(points, c=c, **kwargs) tax.left_axis_label("Percent Wind") tax.right_axis_label("Percent Natural Gas") tax.bottom_axis_label("Percent Coal") tax.gridlines(multiple=.25, color='k', linestyle='-') tax.ticks(axis='lbr', multiple=0.25, linewidth=0) tax.clear_matplotlib_ticks() # plt.scatter(x, y, c=c, **kwargs) vmin = gen_index[color_col].min() vmax = gen_index[color_col].max() cmap = plt.get_cmap('viridis') #sns.diverging_palette(240, 10, l=65, center="dark", as_cmap=True) with sns.axes_style('white', {'axes.linewidth': 0, 'axes.grid': False}): g = sns.FacetGrid(gen_index, col='region', col_wrap=3, col_order=order, palette='viridis') g.map(ternary_facet_scatter, 'points', color_col, alpha=0.5, s=100, cmap=cmap, vmin=vmin, vmax=vmax) # Make space for the colorbar # g.fig.subplots_adjust(right=.92) # Define a new Axes where the colorbar will go # cax = g.fig.add_axes([.94, .25, .02, .6]) cax = g.fig.add_axes([.82, .05, .02, .23]) # Get a mappable object with the same colormap as the data points = plt.scatter([], [], c=[], cmap=cmap, vmin=vmin, vmax=vmax) # Draw the colorbar g.fig.colorbar(points, cax=cax).outline.set_visible(False) g.fig.text(.84, .3, 'Normalized Variability', ha='center') axes = g.axes.flatten() for ax, title in zip(axes, order): ax.set_title(title) ax.set_xlabel('') ax.set_ylabel('') # In[232]: df = gen_index.loc[gen_index['region'] == 'FRCC'] points = df['points'] c = df['Normalized Index variability'] scale = 1 # ax = plt.gca() figure, tax = ternary.figure(scale=scale) tax.boundary(linewidth = 1.0) # tax.gridlines(multiple=10,color="blue") tax.scatter(points, c=c) tax.left_axis_label("Percent Wind") tax.right_axis_label("Percent Natural Gas") tax.bottom_axis_label("Percent Coal") tax.gridlines(multiple=.25, color='k', linestyle='-') tax.ticks(axis='lbr', multiple=0.25, linewidth=0) tax.clear_matplotlib_ticks() tax.show()