#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import pandas as pd import seaborn as sns import os import glob import numpy as np # I've been using state boundaries to define approximate NERC regions. How accurate is this? Use 2015 EIA 923 data to check. # ## Read NERC by state data # In[2]: path = os.path.join('Data storage', 'final NERC data from states', 'Monthly gen*') mg_fns = glob.glob(path) # In[3]: df_list = [] for f in mg_fns: region = f.split()[-1][:-4] df = pd.read_csv(f) df['region'] = region df_list.append(df) full_mg = pd.concat(df_list) full_mg.reset_index(inplace=True, drop=True) full_mg['datetime'] = pd.to_datetime(full_mg['datetime']) monthly_gen = pd.pivot_table(full_mg, index=['region', 'datetime'], values='generation (MWh)', columns='fuel category 1') monthly_gen.reset_index(inplace=True, drop=False) monthly_gen['Year'] = monthly_gen['datetime'].dt.year monthly_gen.replace(np.nan, 0, inplace=True) # In[5]: monthly_gen.head() # In[54]: annual_gen = monthly_gen.groupby(['region', 'Year']).sum() annual_gen.reset_index(inplace=True, drop=False) # In[55]: annual_gen = annual_gen.loc[annual_gen['Year'] == 2015] annual_gen.index = annual_gen['region'] annual_gen.drop(['region', 'Year'], axis=1, inplace=True) annual_gen['Total'] = annual_gen.sum(axis=1) annual_gen # ## Read EIA-923 data # In[6]: path = os.path.join('Data storage', 'EIA923_Schedules_2_3_4_5_M_12_2015_Final.xlsx') eia = pd.read_excel(path, header=5, parse_cols='A,G,I,N,O,P,CR') # In[31]: eia.columns = [col.replace('\n', ' ') for col in eia.columns] eia.rename(columns={'Net Generation (Megawatthours)': 'Generation (MWh)', 'NERC Region': 'region'}, inplace=True) # In[32]: eia.head() # In[13]: eia['AER Fuel Type Code'].unique() # In[15]: fuel_cats = {'Coal': ['COL'], 'Natural Gas': ['NG'], 'Hydro': ['HYC', 'HPS'], 'Nuclear': ['NUC'], 'Wind': ['WND'], 'Solar': ['SUN'], 'Other Renewables': ['GEO', 'ORW', 'WWW', 'MLG'], 'Other': ['WOO', 'DFO', 'RFO', 'PC', 'OTH', 'OOG', 'WOC']} # In[22]: for cat, fuels in fuel_cats.iteritems(): eia.loc[eia['AER Fuel Type Code'].isin(fuels), 'Fuel category'] = cat # In[56]: eia_nerc = eia.groupby(['region', 'Fuel category']).sum() eia_nerc.reset_index(inplace=True) # In[57]: eia_nerc = eia_nerc.pivot_table(index='region', columns=['Fuel category'], values='Generation (MWh)') eia_nerc.rename(index={'TRE': 'ERCOT'}, inplace=True) eia_nerc['Total'] = eia_nerc.sum(axis=1) eia_nerc # ## Compare 2 data sets # In[60]: (annual_gen - eia_nerc) / eia_nerc # In[ ]: