%matplotlib inline
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import glob
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
from geopandas import GeoDataFrame
sns.set(style='whitegrid')
path = os.path.join('Data storage', 'Facility gen fuels and CO2 2017-05-25.zip')
facility_df = pd.read_csv(path, parse_dates=['datetime'])
facility_df.head()
f | fuel | month | plant id | total fuel (mmbtu) | year | generation (MWh) | elec fuel (mmbtu) | geography | last_updated | lat | lon | prime mover | datetime | quarter | all fuel fossil CO2 (kg) | elec fuel fossil CO2 (kg) | all fuel total CO2 (kg) | elec fuel total CO2 (kg) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | M | NG | 3 | 10275 | 0.0 | 2017 | 0.0 | 0.0 | USA-FL | 2017-05-24T14:26:30-04:00 | 27.9114 | -81.6006 | ALL | 2017-03-01 | 1 | 0.0 | 0.0 | 0.0 | 0.0 |
1 | M | NG | 2 | 10275 | 0.0 | 2017 | 0.0 | 0.0 | USA-FL | 2017-05-24T14:26:30-04:00 | 27.9114 | -81.6006 | ALL | 2017-02-01 | 1 | 0.0 | 0.0 | 0.0 | 0.0 |
2 | M | NG | 1 | 10275 | 0.0 | 2017 | 0.0 | 0.0 | USA-FL | 2017-05-24T14:26:30-04:00 | 27.9114 | -81.6006 | ALL | 2017-01-01 | 1 | 0.0 | 0.0 | 0.0 | 0.0 |
3 | M | NG | 12 | 10275 | 0.0 | 2016 | 0.0 | 0.0 | USA-FL | 2017-05-24T14:26:30-04:00 | 27.9114 | -81.6006 | ALL | 2016-12-01 | 4 | 0.0 | 0.0 | 0.0 | 0.0 |
4 | M | NG | 11 | 10275 | 0.0 | 2016 | 0.0 | 0.0 | USA-FL | 2017-05-24T14:26:30-04:00 | 27.9114 | -81.6006 | ALL | 2016-11-01 | 4 | 0.0 | 0.0 | 0.0 | 0.0 |
facility_df.dropna(inplace=True, subset=['lat', 'lon'])
cols = ['lat', 'lon', 'plant id', 'year']
small_facility = facility_df.loc[:, cols].drop_duplicates()
geometry = [Point(xy) for xy in zip(small_facility.lon, small_facility.lat)]
# small_facility = small_facility.drop(['lon', 'lat'], axis=1)
crs = {'init': 'epsg:4326'}
geo_df = GeoDataFrame(small_facility, crs=crs, geometry=geometry)
geo_df
¶path = os.path.join('Data storage', 'NERC_Regions_EIA', 'NercRegions_201610.shp')
regions = gpd.read_file(path)
facility_nerc = gpd.sjoin(geo_df, regions, how='inner', op='within')
facility_nerc.head()
lat | lon | plant id | year | geometry | index_right | NERC | NERC_Label | |
---|---|---|---|---|---|---|---|---|
0 | 27.9114 | -81.6006 | 10275 | 2017 | POINT (-81.6006 27.9114) | 1 | FRCC | Florida Reliability Coordinating Council (FRCC) |
3 | 27.9114 | -81.6006 | 10275 | 2016 | POINT (-81.6006 27.9114) | 1 | FRCC | Florida Reliability Coordinating Council (FRCC) |
15 | 27.9114 | -81.6006 | 10275 | 2015 | POINT (-81.6006 27.9114) | 1 | FRCC | Florida Reliability Coordinating Council (FRCC) |
27 | 27.9114 | -81.6006 | 10275 | 2014 | POINT (-81.6006 27.9114) | 1 | FRCC | Florida Reliability Coordinating Council (FRCC) |
39 | 27.9114 | -81.6006 | 10275 | 2013 | POINT (-81.6006 27.9114) | 1 | FRCC | Florida Reliability Coordinating Council (FRCC) |
cols = ['plant id', 'year', 'NERC']
facility_df = facility_df.merge(facility_nerc.loc[:, cols],
on=['plant id', 'year'], how='left')
facility_df.head()
f | fuel | month | plant id | total fuel (mmbtu) | year | generation (MWh) | elec fuel (mmbtu) | geography | last_updated | lat | lon | prime mover | datetime | quarter | all fuel fossil CO2 (kg) | elec fuel fossil CO2 (kg) | all fuel total CO2 (kg) | elec fuel total CO2 (kg) | NERC | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | M | NG | 3 | 10275 | 0.0 | 2017 | 0.0 | 0.0 | USA-FL | 2017-05-24T14:26:30-04:00 | 27.9114 | -81.6006 | ALL | 2017-03-01 | 1 | 0.0 | 0.0 | 0.0 | 0.0 | FRCC |
1 | M | NG | 2 | 10275 | 0.0 | 2017 | 0.0 | 0.0 | USA-FL | 2017-05-24T14:26:30-04:00 | 27.9114 | -81.6006 | ALL | 2017-02-01 | 1 | 0.0 | 0.0 | 0.0 | 0.0 | FRCC |
2 | M | NG | 1 | 10275 | 0.0 | 2017 | 0.0 | 0.0 | USA-FL | 2017-05-24T14:26:30-04:00 | 27.9114 | -81.6006 | ALL | 2017-01-01 | 1 | 0.0 | 0.0 | 0.0 | 0.0 | FRCC |
3 | M | NG | 12 | 10275 | 0.0 | 2016 | 0.0 | 0.0 | USA-FL | 2017-05-24T14:26:30-04:00 | 27.9114 | -81.6006 | ALL | 2016-12-01 | 4 | 0.0 | 0.0 | 0.0 | 0.0 | FRCC |
4 | M | NG | 11 | 10275 | 0.0 | 2016 | 0.0 | 0.0 | USA-FL | 2017-05-24T14:26:30-04:00 | 27.9114 | -81.6006 | ALL | 2016-11-01 | 4 | 0.0 | 0.0 | 0.0 | 0.0 | FRCC |
Filter out data older than 2014 to reduce size
facility_df['state'] = facility_df['geography'].str[-2:]
keep_cols = ['fuel', 'year', 'month', 'datetime', 'state', 'plant id', 'NERC',
'generation (MWh)', 'total fuel (mmbtu)', 'elec fuel (mmbtu)']
facility_df = facility_df.loc[facility_df['year'] >= 2014, keep_cols]
facility_fuel_cats = {'COW': ['SUB', 'BIT', 'LIG', 'WC', 'SC', 'RC', 'SGC'],
'NG': ['NG'],
'PEL': ['DFO', 'RFO', 'KER', 'JF',
'PG', 'WO', 'SGP'],
'PC': ['PC'],
'HYC': ['WAT'],
'HPS': [],
'GEO': ['GEO'],
'NUC': ['NUC'],
'OOG': ['BFG', 'OG', 'LFG'],
'OTH': ['OTH', 'MSN', 'MSW', 'PUR', 'TDF', 'WH'],
'SUN': ['SUN'],
'DPV': [],
'WAS': ['OBL', 'OBS', 'OBG', 'MSB', 'SLW'],
'WND': ['WND'],
'WWW': ['WDL', 'WDS', 'AB', 'BLQ']
}
for category in facility_fuel_cats.keys():
fuels = facility_fuel_cats[category]
facility_df.loc[facility_df['fuel'].isin(fuels),
'fuel category'] = category
facility_df.head()
fuel | year | month | datetime | state | plant id | NERC | generation (MWh) | total fuel (mmbtu) | elec fuel (mmbtu) | fuel category | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | NG | 2017 | 3 | 2017-03-01 | FL | 10275 | FRCC | 0.0 | 0.0 | 0.0 | NG |
1 | NG | 2017 | 2 | 2017-02-01 | FL | 10275 | FRCC | 0.0 | 0.0 | 0.0 | NG |
2 | NG | 2017 | 1 | 2017-01-01 | FL | 10275 | FRCC | 0.0 | 0.0 | 0.0 | NG |
3 | NG | 2016 | 12 | 2016-12-01 | FL | 10275 | FRCC | 0.0 | 0.0 | 0.0 | NG |
4 | NG | 2016 | 11 | 2016-11-01 | FL | 10275 | FRCC | 0.0 | 0.0 | 0.0 | NG |
facility_df.dtypes
fuel object year int64 month int64 datetime datetime64[ns] state object plant id int64 NERC object generation (MWh) float64 total fuel (mmbtu) float64 elec fuel (mmbtu) float64 fuel category object dtype: object
facility_df.loc[facility_df['NERC'].isnull(), 'state'].unique()
array(['HI', 'FL', 'VA', 'MI', 'ME', 'MN', 'CA', 'AK', 'NY', 'MD', 'WI', 'NH', 'PA', 'OR', 'MA', 'IL', 'DC', 'RI', 'TX', 'CT', 'WA'], dtype=object)
folder = os.path.join('Data storage', 'Derived data', 'state gen data')
states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE",
"FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS",
"KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS",
"MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY",
"NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
"SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]
state_list = []
for state in states:
path = os.path.join(folder, '{} fuels gen.csv'.format(state))
df = pd.read_csv(path, parse_dates=['datetime'])
state_list.append(df)
state_df = pd.concat(state_list)
state_df.reset_index(inplace=True, drop=True)
state_df.dtypes
end int64 f object geography object last_updated object sector int64 series_id object start int64 type object units object year int64 month int64 generation (MWh) float64 datetime datetime64[ns] quarter int64 total fuel (mmbtu) float64 elec fuel (mmbtu) float64 all fuel CO2 (kg) float64 elec fuel CO2 (kg) float64 dtype: object
state_df['state'] = state_df['geography'].str[-2:]
keep_cols = ['state', 'type', 'year', 'datetime', 'generation (MWh)',
'elec fuel (mmbtu)']
fuel_cats = facility_fuel_cats.keys()
state_df = state_df.loc[(state_df['year'] >= 2014) &
(state_df['type'].isin(fuel_cats)), keep_cols]
state_df['type'].unique()
array(['COW', 'HYC', 'NUC', 'NG', 'PEL', 'DPV', 'OTH', 'OOG', 'WWW', 'SUN', 'WAS', 'WND', 'HPS', 'PC', 'GEO'], dtype=object)
annual_facility = facility_df.groupby(['year', 'state', 'fuel category']).sum()
# annual_facility.reset_index(inplace=True)
annual_facility.drop('plant id', axis=1, inplace=True)
annual_facility.head()
generation (MWh) | elec fuel (mmbtu) | |||
---|---|---|---|---|
year | state | fuel category | ||
2014 | AK | COW | 558292.181 | 7216953.0 |
HYC | 1538738.000 | 14633403.0 | ||
NG | 3288022.319 | 32828304.0 | ||
OOG | 56165.769 | 546450.0 | ||
PEL | 445621.447 | 6927101.0 |
annual_state = state_df.groupby(['year', 'state', 'type']).sum()
# annual_state.reset_index(inplace=True)
annual_state.head(n=25)
generation (MWh) | elec fuel (mmbtu) | |||
---|---|---|---|---|
year | state | type | ||
2014 | AK | COW | 558292.17 | 7216950.0 |
HYC | 1538738.00 | NaN | ||
NG | 3288022.33 | 32828310.0 | ||
OOG | NaN | NaN | ||
OTH | -2312.99 | NaN | ||
PEL | 445621.46 | 6927090.0 | ||
WAS | 62511.68 | NaN | ||
WND | 151957.00 | NaN | ||
WWW | 0.00 | NaN | ||
AL | COW | 47301626.28 | 488993810.0 | |
DPV | 3101.38 | NaN | ||
HYC | 9466872.01 | NaN | ||
NG | 48270074.40 | 362215370.0 | ||
NUC | 41243689.00 | NaN | ||
OOG | 180403.48 | NaN | ||
OTH | 140.51 | NaN | ||
PEL | 98100.01 | 1199180.0 | ||
WAS | 46936.84 | NaN | ||
WWW | 2732084.23 | NaN | ||
AR | COW | 33220754.79 | 334098580.0 | |
DPV | 4853.48 | NaN | ||
HPS | 67070.00 | NaN | ||
HYC | 2639776.01 | NaN | ||
NG | 9613708.03 | 70429870.0 | ||
NUC | 14478259.00 | NaN |
It's interesting that the facility data has fuel consumption for solar generation and the state data doesn't. Looking at a 923 data file, it's clear that the fuel consumption is just based on a conversion efficiency of 36.6% across all facilities.
annual_state.loc[2016, 'CA', 'SUN']
generation (MWh) 19030396.62 elec fuel (mmbtu) NaN Name: (2016, CA, SUN), dtype: float64
annual_facility.loc[2016, 'CA', 'SUN']
generation (MWh) 14354970.0 elec fuel (mmbtu) 133773953.0 Name: (2016, CA, SUN), dtype: float64
How much generation from large sources (Hydro, wind, coal, natural gas, and nuclear) is missed by monthly 923 data?
for fuel in ['HYC', 'WND', 'COW', 'NG', 'NUC', 'SUN']:
state_total = annual_state.loc[2016, :, fuel]['generation (MWh)'].sum()
facility_total = annual_facility.loc[2016, :, fuel]['generation (MWh)'].sum()
error = (state_total - facility_total) / state_total
print('{} has an error of {:.2f}%'.format(fuel, error * 100))
HYC has an error of 24.60% WND has an error of 3.35% COW has an error of 1.15% NG has an error of 5.21% NUC has an error of 0.00% SUN has an error of 37.42%
The goal here is to figure out how much of generation and fuel consumption from facilities that only report annually is in each NERC region (by state)
path = os.path.join('Data storage', 'EIA923_Schedules_2_3_4_5_M_12_2015_Final.xlsx')
frequency = pd.read_excel(path, sheetname='Page 6 Plant Frame', header=4)
frequency.head()
YEAR | Plant Id | Plant State | Sector Number | NAICS Code | Plant Name | Combined Heat And Power Status | Reporting Frequency | |
---|---|---|---|---|---|---|---|---|
0 | 2015 | 2 | AL | 1 | 22 | Bankhead Dam | N | A |
1 | 2015 | 3 | AL | 1 | 22 | Barry | N | M |
2 | 2015 | 4 | AL | 1 | 22 | Walter Bouldin Dam | N | M |
3 | 2015 | 7 | AL | 1 | 22 | Gadsden | Y | A |
4 | 2015 | 8 | AL | 1 | 22 | Gorgas | N | M |
frequency.rename(columns={'Plant Id': 'plant id',
'Plant State': 'state',
'YEAR': 'year',
'Reporting\nFrequency': 'Reporting Frequency'}, inplace=True)
frequency.head()
year | plant id | state | Sector Number | NAICS Code | Plant Name | Combined Heat And Power Status | Reporting Frequency | |
---|---|---|---|---|---|---|---|---|
0 | 2015 | 2 | AL | 1 | 22 | Bankhead Dam | N | A |
1 | 2015 | 3 | AL | 1 | 22 | Barry | N | M |
2 | 2015 | 4 | AL | 1 | 22 | Walter Bouldin Dam | N | M |
3 | 2015 | 7 | AL | 1 | 22 | Gadsden | Y | A |
4 | 2015 | 8 | AL | 1 | 22 | Gorgas | N | M |
frequency.dtypes
year int64 plant id int64 state object Sector Number int64 NAICS Code int64 Plant Name object Combined Heat And\nPower Status object Reporting Frequency object dtype: object
Make a dataframe with generation, fuel consumption, and reporting frequency of facilities in 2015
freq_cols = ['year', 'plant id', 'Reporting Frequency']
df = pd.merge(facility_df, frequency.loc[:, freq_cols], on=['year', 'plant id'])
df.head()
fuel | year | month | datetime | state | plant id | NERC | generation (MWh) | total fuel (mmbtu) | elec fuel (mmbtu) | fuel category | Reporting Frequency | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NG | 2015 | 12 | 2015-12-01 | FL | 10275 | FRCC | 4344.509 | 55210.0 | 22133.0 | NG | A |
1 | NG | 2015 | 11 | 2015-11-01 | FL | 10275 | FRCC | 4304.052 | 54695.0 | 21927.0 | NG | A |
2 | NG | 2015 | 10 | 2015-10-01 | FL | 10275 | FRCC | 4810.546 | 61133.0 | 24507.0 | NG | A |
3 | NG | 2015 | 9 | 2015-09-01 | FL | 10275 | FRCC | 5058.453 | 64282.0 | 25770.0 | NG | A |
4 | NG | 2015 | 8 | 2015-08-01 | FL | 10275 | FRCC | 5404.571 | 68680.0 | 27533.0 | NG | A |
g = sns.factorplot(x='fuel category', y='generation (MWh)', hue='Reporting Frequency',
col='NERC', col_wrap=3, data=df, estimator=np.sum, ci=0, kind='bar',
palette='tab10')
g.set_xticklabels(rotation=30)
<seaborn.axisgrid.FacetGrid at 0x16fb2e198>
Number of NERC regions in a state
df.loc[df['state'] == 'TX', 'NERC'].nunique()
3
This is development of a method that will be used to approximate the fraction of EIA-estimated generation and consumption within each state that gets apportioned to each NERC regions (when there is more than one). The idea is to take data from the most recent "final" EIA-923 and use the annual reporting facilities to approximate the divisions for more recent data. I still need to figure out if it's better to do the calculation by month within a year or just for the year as a whole.
Determining if it's better to do month-by-month vs a single value for the whole year will depend on if the share of generation/consumption from Annual reporting facilities in each NERC changes much over the course of the year. There is the potential for error either way, and maybe even differences by state. Annual is certainly simpler.
While looking at data for Texas, I've discovered that generation from Annual reporting facilities can be negative. Need to figure out how (if?) to deal with this...
While there can be variation of % generation in each NERC within a state over the course of 2015, most fuel categories across most states are quite stable. And when fuels do a have a wide spread over the year, they also tend to not be a large fraction of total generation within the NERC region. Given these observations, I'm going to stick with a split calculated as the average over an entire year.
def annual(df, state):
"""Return the percent of gen & consumption by fuel type in each NERC region
for a state"""
a = df.loc[(df.state == state) &
(df['Reporting Frequency'] == 'A')].copy()
a.drop(['plant id', 'year'], axis=1, inplace=True)
a = a.groupby(['NERC', 'fuel category']).sum()
fuels = set(a.index.get_level_values('fuel category'))
temp_list = []
for fuel in fuels:
temp = (a.xs(fuel, level='fuel category')
/ a.xs(fuel, level='fuel category').sum())
temp['fuel category'] = fuel
temp_list.append(temp)
result = pd.concat(temp_list)
result.reset_index(inplace=True)
result['state'] = state
rename_cols = {'generation (MWh)': '% generation',
'total fuel (mmbtu)': '% total fuel',
'elec fuel (mmbtu)': '% elec fuel'}
result.rename(columns=rename_cols, inplace=True)
return result
def annual_month(df, state):
"""Return the percent of gen & consumption by fuel type and month in each
NERC region for a state"""
a = df.loc[(df.state == state) &
(df['Reporting Frequency'] == 'A')].copy()
a.drop(['plant id', 'year'], axis=1, inplace=True)
a = a.groupby(['NERC', 'fuel category', 'month']).sum()
fuels = set(a.index.get_level_values('fuel category'))
temp_list = []
for fuel in fuels:
for month in range(1, 13):
temp = (a.xs(fuel, level='fuel category')
.xs(month, level='month')
/ a.xs(fuel, level='fuel category')
.xs(month, level='month')
.sum())
temp['fuel category'] = fuel
temp['month'] = month
temp_list.append(temp)
result = pd.concat(temp_list)
result.reset_index(inplace=True)
result['state'] = state
rename_cols = {'generation (MWh)': '% generation',
'total fuel (mmbtu)': '% total fuel',
'elec fuel (mmbtu)': '% elec fuel'}
result.rename(columns=rename_cols, inplace=True)
return result
This is the percent of generation, total fuel consumption, and electric fuel consumption from facilities that report annually to EIA-923
df_list = []
for state in states:
num_nerc = df.loc[df.state == state, 'NERC'].nunique()
if num_nerc > 1:
df_list.append(annual(df, state))
df_list = []
for state in states:
num_nerc = df.loc[df.state == state, 'NERC'].nunique()
if num_nerc > 1:
df_list.append(annual_month(df, state))
fuel_by_nerc_month = pd.concat(df_list).reset_index(drop=True)
fuel_by_nerc = pd.concat(df_list).reset_index(drop=True)
fuel_by_nerc.head()
NERC | month | % generation | % total fuel | % elec fuel | fuel category | state | |
---|---|---|---|---|---|---|---|
0 | - | 0.333333 | 0.000066 | 0.034382 | 0.000065 | WWW | AR |
1 | SERC | 0.666667 | 0.999934 | 0.965618 | 0.999935 | WWW | AR |
2 | SERC | 1.000000 | NaN | NaN | NaN | PC | AR |
3 | - | 0.276596 | 0.002770 | 0.001223 | 0.002772 | NG | AR |
4 | SERC | 0.446809 | 0.219980 | 0.636033 | 0.175285 | NG | AR |
fuel_by_nerc_month.tail()
NERC | % generation | % total fuel | % elec fuel | fuel category | month | state | |
---|---|---|---|---|---|---|---|
2704 | RFC | 0.249526 | 0.382119 | 0.513470 | NG | 10 | WI |
2705 | MRO | 0.946647 | 0.666793 | 0.665857 | NG | 11 | WI |
2706 | RFC | 0.053353 | 0.333207 | 0.334143 | NG | 11 | WI |
2707 | MRO | 0.923901 | 0.658481 | 0.718664 | NG | 12 | WI |
2708 | RFC | 0.076099 | 0.341519 | 0.281336 | NG | 12 | WI |
st
split_states = []
for state in states:
if df.loc[df.state == state, 'NERC'].nunique() > 1:
split_states.append(state)
split_states
['AR', 'FL', 'IL', 'IA', 'KS', 'KY', 'LA', 'MI', 'MO', 'NE', 'NM', 'NC', 'OK', 'SD', 'TX', 'VA', 'WI']
cols = ['state', 'NERC', 'fuel category']
a = fuel_by_nerc_month.groupby(cols).std()
a.drop('month', axis=1, inplace=True)
a.xs('AR', level='state')
% generation | % total fuel | % elec fuel | ||
---|---|---|---|---|
NERC | fuel category | |||
- | HYC | 4.928033e-09 | 4.608873e-07 | 4.608873e-07 |
NG | 3.047858e-04 | 4.821993e-04 | 2.272056e-04 | |
PEL | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | |
SUN | NaN | NaN | NaN | |
WWW | 2.242653e-05 | 6.797735e-03 | 1.781255e-05 | |
SERC | HYC | 6.275202e-09 | 8.077144e-07 | 8.077144e-07 |
NG | 8.582103e-02 | 1.433441e-01 | 6.745018e-02 | |
OOG | 3.920610e-08 | 4.330886e-06 | 4.330886e-06 | |
PC | NaN | NaN | NaN | |
PEL | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | |
WAS | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | |
WWW | 2.242653e-05 | 6.797735e-03 | 1.781255e-05 | |
SPP | HYC | 5.370029e-09 | 6.464797e-07 | 6.464797e-07 |
NG | 8.551625e-02 | 1.428619e-01 | 6.722309e-02 | |
OOG | 3.920610e-08 | 4.330886e-06 | 4.330886e-06 |
a[a > .1].dropna(how='all')
% generation | % total fuel | % elec fuel | |||
---|---|---|---|---|---|
state | NERC | fuel category | |||
AR | SERC | NG | NaN | 0.143344 | NaN |
SPP | NG | NaN | 0.142862 | NaN | |
FL | FRCC | COW | 0.102601 | NaN | 0.134697 |
SERC | COW | 0.102601 | NaN | 0.134697 | |
IA | - | NG | 0.303138 | 0.264098 | 0.300394 |
MRO | COW | 0.132677 | NaN | 0.140241 | |
NG | 0.211916 | 0.213591 | 0.214703 | ||
SERC | COW | 0.153661 | NaN | 0.152410 | |
NG | 0.121395 | NaN | 0.113601 | ||
MI | MRO | PEL | 0.579756 | NaN | NaN |
RFC | PEL | 0.579756 | NaN | NaN | |
TX | SPP | WND | 0.214635 | 0.214635 | 0.214635 |
WWW | 5.272077 | NaN | 0.124454 | ||
TRE | WND | 0.214635 | 0.214635 | 0.214635 | |
WWW | 5.272077 | NaN | 0.124454 | ||
VA | RFC | COW | 0.169581 | NaN | 0.185219 |
NG | NaN | NaN | 0.116589 | ||
PEL | NaN | 0.115835 | NaN | ||
SERC | COW | 0.169581 | NaN | 0.185219 | |
NG | NaN | NaN | 0.116589 | ||
PEL | NaN | 0.115835 | NaN | ||
WI | MRO | NG | 0.159994 | NaN | 0.143071 |
PEL | 0.187468 | NaN | NaN | ||
RFC | NG | 0.159994 | NaN | 0.143071 | |
PEL | 0.187468 | NaN | NaN |
fuels = ['NG', 'HYC', 'COW', 'GEO', 'WND', 'SUN']
sns.factorplot(x='month', y='% generation', hue='fuel category', col='NERC',
row='state',
data=fuel_by_nerc_month.loc[(fuel_by_nerc_month['fuel category'].isin(fuels)) &
(fuel_by_nerc_month['NERC'] != '-')],
n_boot=1)
path = os.path.join('Figures', 'SI', 'Annual facility seasonal gen variation.pdf')
# plt.savefig(path, bbox_inches='tight')
fuel_by_nerc_month.loc[(fuel_by_nerc_month.state=='TX') &
(fuel_by_nerc_month['fuel category'] == 'WWW')]
NERC | % generation | % total fuel | % elec fuel | fuel category | month | state | |
---|---|---|---|---|---|---|---|
2265 | SPP | 4.484362 | 0.538867 | 0.562231 | WWW | 1 | TX |
2266 | TRE | -3.484362 | 0.461133 | 0.437769 | WWW | 1 | TX |
2267 | SPP | 4.132260 | 0.473945 | 0.564103 | WWW | 2 | TX |
2268 | TRE | -3.132260 | 0.526055 | 0.435897 | WWW | 2 | TX |
2269 | SPP | 5.280268 | 0.519683 | 0.550018 | WWW | 3 | TX |
2270 | TRE | -4.280268 | 0.480317 | 0.449982 | WWW | 3 | TX |
2271 | SPP | 2.961378 | 0.697669 | 0.725501 | WWW | 4 | TX |
2272 | TRE | -1.961378 | 0.302331 | 0.274499 | WWW | 4 | TX |
2273 | SPP | -5.065479 | 0.586754 | 0.959101 | WWW | 5 | TX |
2274 | TRE | 6.065479 | 0.413246 | 0.040899 | WWW | 5 | TX |
2275 | SPP | 3.538606 | 0.417402 | 0.656162 | WWW | 6 | TX |
2276 | TRE | -2.538606 | 0.582598 | 0.343838 | WWW | 6 | TX |
2277 | SPP | 5.823124 | 0.481409 | 0.577494 | WWW | 7 | TX |
2278 | TRE | -4.823124 | 0.518591 | 0.422506 | WWW | 7 | TX |
2279 | SPP | 2.823845 | 0.542277 | 0.564253 | WWW | 8 | TX |
2280 | TRE | -1.823845 | 0.457723 | 0.435747 | WWW | 8 | TX |
2281 | SPP | 4.395278 | 0.587122 | 0.573002 | WWW | 9 | TX |
2282 | TRE | -3.395278 | 0.412878 | 0.426998 | WWW | 9 | TX |
2283 | SPP | -12.152599 | 0.618150 | 0.499494 | WWW | 10 | TX |
2284 | TRE | 13.152599 | 0.381850 | 0.500506 | WWW | 10 | TX |
2285 | SPP | 4.745620 | 0.507283 | 0.534694 | WWW | 11 | TX |
2286 | TRE | -3.745620 | 0.492717 | 0.465306 | WWW | 11 | TX |
2287 | SPP | 3.483764 | 0.473996 | 0.558542 | WWW | 12 | TX |
2288 | TRE | -2.483764 | 0.526004 | 0.441458 | WWW | 12 | TX |
df.loc[(df.state == 'TX') &
(df['fuel category'] == 'WWW') &
(df['Reporting Frequency'] == 'A')].groupby(['NERC', 'month', 'fuel category']).sum()
year | plant id | generation (MWh) | total fuel (mmbtu) | elec fuel (mmbtu) | |||
---|---|---|---|---|---|---|---|
NERC | month | fuel category | |||||
SPP | 1 | WWW | 6045 | 158335 | 24477.085 | 1031862.0 | 270598.0 |
2 | WWW | 6045 | 158335 | 23591.727 | 1002454.0 | 260114.0 | |
3 | WWW | 6045 | 158335 | 23338.641 | 1055755.0 | 258287.0 | |
4 | WWW | 6045 | 158335 | 23981.906 | 1079494.0 | 265683.0 | |
5 | WWW | 6045 | 158335 | 22927.767 | 1020067.0 | 253919.0 | |
6 | WWW | 6045 | 158335 | 19081.413 | 824972.0 | 210964.0 | |
7 | WWW | 6045 | 158335 | 22039.664 | 994750.0 | 243501.0 | |
8 | WWW | 6045 | 158335 | 22143.524 | 1014658.0 | 243921.0 | |
9 | WWW | 6045 | 158335 | 22219.108 | 1071510.0 | 245327.0 | |
10 | WWW | 6045 | 158335 | 15374.204 | 712726.0 | 169390.0 | |
11 | WWW | 6045 | 158335 | 22856.243 | 1012581.0 | 253042.0 | |
12 | WWW | 6045 | 158335 | 21888.202 | 964300.0 | 241652.0 | |
TRE | 1 | WWW | 6045 | 75188 | -19018.764 | 883010.0 | 210695.0 |
2 | WWW | 6045 | 75188 | -17882.569 | 1112673.0 | 200997.0 | |
3 | WWW | 6045 | 75188 | -18918.668 | 975783.0 | 211310.0 | |
4 | WWW | 6045 | 75188 | -15883.681 | 467792.0 | 100523.0 | |
5 | WWW | 6045 | 75188 | -27454.045 | 718424.0 | 10828.0 | |
6 | WWW | 6045 | 75188 | -13689.060 | 1151472.0 | 110548.0 | |
7 | WWW | 6045 | 75188 | -18254.812 | 1071581.0 | 178150.0 | |
8 | WWW | 6045 | 75188 | -14301.901 | 856449.0 | 188369.0 | |
9 | WWW | 6045 | 75188 | -17163.885 | 753512.0 | 182816.0 | |
10 | WWW | 6045 | 75188 | -16639.300 | 440272.0 | 169733.0 | |
11 | WWW | 6045 | 75188 | -18039.961 | 983507.0 | 220204.0 | |
12 | WWW | 6045 | 75188 | -15605.285 | 1070104.0 | 190996.0 |
df.loc[(df.state == 'TX') &
(df['fuel category'] == 'WWW') &
(df['Reporting Frequency'] == 'A')].groupby(['NERC', 'fuel category']).sum()
year | month | plant id | generation (MWh) | total fuel (mmbtu) | elec fuel (mmbtu) | ||
---|---|---|---|---|---|---|---|
NERC | fuel category | ||||||
SPP | WWW | 72540 | 234 | 1900020 | 263919.484 | 11785129.0 | 2916398.0 |
TRE | WWW | 72540 | 234 | 902256 | -212851.931 | 10484579.0 | 1975169.0 |
NERC_states = ['WY', 'SD', 'NE', 'OK', 'TX', 'NM', 'LA', 'AR',
'MO', 'MN', 'IL', 'KY', 'VA', 'FL']
error_list = []
for state in NERC_states:
error = (annual_state.loc[2016, state]
- annual_facility.loc[2016, state]) / annual_state.loc[2016, state]
error['state'] = state
for col in ['generation (MWh)']:#, 'elec fuel (mmbtu)']:
if error.loc[error[col] > 0.05, col].any():
error_list.append(error.loc[error[col] > 0.05])
The dataframe below shows all states with more than one NERC region where facility generation is at least 5% below EIA's state-level estimate in 2016.
pd.concat(error_list)
generation (MWh) | elec fuel (mmbtu) | state | |
---|---|---|---|
COW | 0.057014 | 0.058988 | WY |
HYC | 0.095835 | NaN | WY |
NG | 0.413092 | 0.508736 | WY |
NG | 0.212550 | 0.233675 | SD |
HYC | 1.000000 | NaN | NE |
NG | 0.141895 | 0.122942 | NE |
WAS | 1.000000 | NaN | NE |
HYC | 0.327894 | NaN | OK |
OTH | 0.359396 | NaN | OK |
PEL | 0.098354 | 0.089782 | OK |
WAS | 1.000000 | NaN | OK |
WWW | 1.000000 | NaN | OK |
HYC | 0.688630 | NaN | TX |
NG | 0.053121 | 0.062912 | TX |
OOG | 0.132470 | NaN | TX |
OTH | 0.533980 | NaN | TX |
SUN | 0.247843 | NaN | TX |
WAS | 1.000000 | NaN | TX |
WWW | 0.443587 | NaN | TX |
GEO | 1.000000 | NaN | NM |
NG | 0.090467 | 0.085517 | NM |
SUN | 0.638877 | NaN | NM |
WAS | 1.000000 | NaN | NM |
NG | 0.055065 | 0.050793 | LA |
OOG | 0.100225 | NaN | LA |
OTH | 0.580154 | NaN | LA |
WAS | 0.930467 | NaN | LA |
WWW | 0.516165 | NaN | LA |
HYC | 0.202653 | NaN | AR |
WAS | 1.000000 | NaN | AR |
WWW | 0.086687 | NaN | AR |
NG | 0.089319 | 0.123133 | MO |
SUN | 0.860146 | NaN | MO |
WAS | 1.000000 | NaN | MO |
HYC | 0.942665 | NaN | MN |
NG | 0.064268 | 0.084587 | MN |
OTH | 0.555342 | NaN | MN |
SUN | 0.794821 | NaN | MN |
WAS | 0.711827 | NaN | MN |
WND | 0.112260 | NaN | MN |
WWW | 0.596931 | NaN | MN |
NG | 0.060489 | 0.059321 | IL |
OOG | 0.961357 | NaN | IL |
OTH | 0.987482 | NaN | IL |
PEL | 0.052267 | 0.059331 | IL |
SUN | 1.000000 | NaN | IL |
WAS | 1.000000 | NaN | IL |
HYC | 0.128468 | NaN | KY |
WAS | 1.000000 | NaN | KY |
HYC | 1.139389 | NaN | VA |
OTH | 0.164274 | NaN | VA |
PEL | 0.055163 | 0.055660 | VA |
WAS | 0.642315 | NaN | VA |
WWW | 0.095183 | NaN | VA |
HYC | 1.000000 | NaN | FL |
OTH | 0.318424 | NaN | FL |
SUN | 0.317072 | NaN | FL |
WAS | 0.585390 | NaN | FL |
WWW | 0.466676 | NaN | FL |