This notebook contains material from Controlling Natural Watersheds; content is available on Github.
Retrieve historical daily datafrom National Water Information System.
# Display graphics inline with the notebook
%matplotlib inline
# Standard Python modules
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import datetime
import requests
# Modules to display images and data tables
from IPython.display import Image
from IPython.core.display import display
def USGS_Flow(s):
userdata = {'cb_00060':'on',
'format' : 'rdb',
'site_no' : s,
'begin_date' : '1970-01-01',
'end_date' : '2014-12-31'}
resp = requests.get('http://waterdata.usgs.gov/nwis/dv', userdata)
station_number = ' '.join(resp.text.split('\n')[14:15][0][1:].split()[0:2])
station_name = ' '.join(resp.text.split('\n')[14:15][0][1:].split()[2:])
data = [line.split('\t')[2:4] for line in resp.text.split('\n') if line[0:4]=='USGS']
data = [d for d in data if d[1]]
flow = pd.Series([float(d[1]) for d in data], index=pd.to_datetime([d[0] for d in data]))
return station_number, station_name, flow
STATION_NUMBER, STATION_NAME, FLOW = USGS_Flow('05132000')
def USGS_FlowFrequency(s):
station_number, station_name, FLOW = USGS_Flow(s)
FLOW = FLOW['1970':]
# select period of year
FLOW = FLOW.ix[FLOW.index.dayofyear >= 1] # 135]
FLOW = FLOW.ix[FLOW.index.dayofyear <= 366] # 196]
# show data
plt.figure(figsize=(12,8))
plt.subplot2grid((2,2),(0,0),colspan=2)
plt.plot(FLOW[:'1999'],color='b')
plt.hold(True)
plt.plot(FLOW['2000':],color='g')
plt.xlabel('Year')
plt.ylabel('Flow [cubic meters/second]')
plt.title(s + ': ' + station_name +
' ' + str(FLOW.index[0].year) + '-' + str(FLOW.index[-1].year))
# histogram distribution of flows
plt.subplot2grid((2,2),(1,0))
plt.hold(True)
hist,bins = np.histogram(FLOW,bins=100)
FLOW['1970':'1999'].hist(bins=bins,normed=1, color='b', alpha=0.5)
FLOW['2000':].hist(bins=bins,normed=1,color='g', alpha=0.5)
plt.ylabel('Frequency')
plt.xlabel('Flow [cubic meters/second]')
plt.legend(['1970-1999','2000-2014'])
plt.tight_layout()
# flow-frequency plots
plt.subplot2grid((2,2),(1,1))
plt.hold(True)
hist,bins = np.histogram(FLOW['1970':'1999'],bins = 100)
chist = np.cumsum(hist[::-1])[::-1]/float(sum(hist))
plt.semilogy(bins[1:],chist)
hist,bins = np.histogram(FLOW['2000':],bins = 100)
chist = np.cumsum(hist[::-1])[::-1]/float(sum(hist))
plt.semilogy(bins[1:],chist,color='g')
plt.xlim([0,plt.xlim()[1]])
plt.legend(['1970-1999','2000-2014'])
plt.title('Probability of Exceeding a Given Flowrate')
plt.xlabel('Flow [cubic feet/sec]')
plt.hold(False)
plt.grid()
plt.tight_layout()
fname = '../images/FlowFrequency_' + s + '.png'
plt.savefig(fname)
!convert $fname -trim $fname
USGS_FlowFrequency('05131500')
USGS_FlowFrequency('05129115')
USGS_FlowFrequency('05132000')
# Read data files as a pandas dataframes
STATIONS = pd.read_csv(HYDAT_dir + 'STATIONS.csv', index_col = 0);
DLY_LEVELS = pd.read_csv(HYDAT_dir + 'DLY_LEVELS.csv')
DLY_FLOWS = pd.read_csv(HYDAT_dir + 'DLY_FLOWS.csv')
print " Stations in the HYDAT database = ", len(STATIONS.index)
Stations in the HYDAT database = 7775
The following cell creates a pandas dataframe of monitoring stations from the STATIONS.csv table extracted from the HYDAT database. The extaction searches for all stations with a specified region bounded by latitude and longitudes.
For reference, this is a map of the Rainy River drainage basin available from the International Joint Commission.
# bounding region
lngW = -94.0
lngE = -90.5
latS = 47.5
latN = 49.0
# locate monitoring stations within bounding region
stationList = STATIONS[(STATIONS['LATITUDE'] >= latS) & \
(STATIONS['LATITUDE'] <= latN) & \
(STATIONS['LONGITUDE'] <= lngE) & \
(STATIONS['LONGITUDE'] >= lngW)].index
# reduce data sets to the area of interest
WSC_STATIONS = STATIONS.ix[stationList]
DLY_LEVELS = DLY_LEVELS.ix[[idx for idx in DLY_LEVELS.index if DLY_LEVELS.ix[idx,'STATION_NUMBER'] in stationList]]
DLY_FLOWS = DLY_FLOWS.ix[[idx for idx in DLY_FLOWS.index if DLY_FLOWS.ix[idx,'STATION_NUMBER'] in stationList]]
# add columns to STATIONS for level and flow stations
WSC_STATIONS['Level'] = ''
WSC_STATIONS['Flow'] = ''
# mark level and flow stations
WSC_STATIONS.ix[set(DLY_LEVELS['STATION_NUMBER']).intersection(WSC_STATIONS.index),'Level'] = True
WSC_STATIONS.ix[set(DLY_FLOWS['STATION_NUMBER']).intersection(WSC_STATIONS.index),'Flow'] = True
print "Stations within region of interest = ", len(WSC_STATIONS.index)
print "DLY_FLOWS records = ", len(DLY_FLOWS.index)
print "DLY_LEVELS records = ", len(DLY_LEVELS.index)
display(WSC_STATIONS.ix[:,['Level','Flow','STATION_NAME','LATITUDE','LONGITUDE',]])
WSC_STATIONS.to_pickle('../data/WSC_STATIONS')
Stations within region of interest = 34 DLY_FLOWS records = 7965 DLY_LEVELS records = 5913
Level | Flow | STATION_NAME | LATITUDE | LONGITUDE | |
---|---|---|---|---|---|
STATION_NUMBER | |||||
05PA001 | True | KETTLE RIVER ABOVE KETTLE FALLS | 48.49444 | -92.64444 | |
05PA003 | True | NAMAKAN LAKE ABOVE KETTLE FALLS DAM | 48.50000 | -92.63886 | |
05PA005 | True | NORTHERN LIGHT LAKE AT OUTLET | 48.24028 | -90.76889 | |
05PA006 | True | True | NAMAKAN RIVER AT OUTLET OF LAC LA CROIX | 48.38256 | -92.17631 |
05PA007 | True | CROOKED LAKE NEAR CURTAIN FALLS | 48.23750 | -91.90611 | |
05PA010 | True | FRENCH LAKE NEAR ATIKOKAN | 48.67222 | -91.13500 | |
05PA011 | True | LAC LA CROIX AT CAMPBELL'S CAMP | 48.35508 | -92.21744 | |
05PA012 | True | True | BASSWOOD RIVER NEAR WINTON | 48.08256 | -91.65117 |
05PA013 | True | NAMAKAN LAKE AT SQUIRREL ISLAND | 48.49686 | -92.65856 | |
05PB001 | True | SEINE RIVER NEAR LA SEINE | 48.75000 | -92.20000 | |
05PB002 | True | LITTLE TURTLE LAKE NEAR MINE CENTRE | 48.77222 | -92.60833 | |
05PB003 | True | MANITOU RIVER ABOVE DEVIL'S CASCADE | 48.97500 | -93.34167 | |
05PB004 | True | FOOTPRINT RIVER AT RAINY LAKE FALLS | 48.85833 | -93.57500 | |
05PB007 | True | RAINY LAKE NEAR FORT FRANCES | 48.64912 | -93.32068 | |
05PB009 | True | SEINE RIVER AT STURGEON FALLS GENERATING STATION | 48.74444 | -92.28472 | |
05PB012 | True | LAC DES MILLE LACS ABOVE OUTLET DAM | 48.98000 | -90.73000 | |
05PB014 | True | True | TURTLE RIVER NEAR MINE CENTRE | 48.85022 | -92.72383 |
05PB015 | True | PIPESTONE RIVER ABOVE RAINY LAKE | 48.56861 | -92.52417 | |
05PB018 | True | True | ATIKOKAN RIVER AT ATIKOKAN | 48.75197 | -91.58408 |
05PB019 | True | NORTHEAST TRIBUTARY TO DASHWA LAKE NEAR ATIKOKAN | 48.95083 | -91.71222 | |
05PB020 | True | EASTERN TRIBUTARY TO DASHWA LAKE NEAR ATIKOKAN | 48.94056 | -91.69833 | |
05PB021 | True | EYE RIVER NEAR HARDTACK LAKE NORTH OF ATIKOKAN | 48.92500 | -91.66222 | |
05PB022 | True | EYE RIVER NEAR COULSON LAKE NORTH OF ATIKOKAN | 48.89444 | -91.66750 | |
05PB023 | True | RAINY LAKE AT NORTHWEST BAY | 48.84167 | -93.62000 | |
05PB024 | True | RAINY LAKE NEAR BEAR PASS | 48.70058 | -92.95800 | |
05PB025 | True | RAINY LAKE AT STOKES BAY | 48.53611 | -92.56111 | |
05PC009 | True | LA VALLEE RIVER AT LA VALLEE | 48.62083 | -93.62500 | |
05PC010 | True | STURGEON RIVER NEAR BARWICK | 48.68750 | -93.98333 | |
05PC016 | True | LA VALLEE RIVER NEAR DEVLIN | 48.59028 | -93.67278 | |
05PC018 | True | True | RAINY RIVER AT MANITOU RAPIDS | 48.63447 | -93.91336 |
05PC019 | True | RAINY RIVER AT FORT FRANCES | 48.60853 | -93.40344 | |
05PC022 | True | True | LA VALLEE RIVER NEAR BURRISS | 48.67844 | -93.66522 |
05PC024 | True | RAINY RIVER AT PITHERS POINT SITE NO.1 | 48.61389 | -93.35472 | |
05PC025 | True | RAINY RIVER AT PITHERS POINT SITE NO.2 | 48.61625 | -93.35992 |
The following function maps a list of stations identified by their station numbers. In extracts latitude and longitude from the STATIONS table, then calls the google maps web api to create a map image.
WSC_STATIONS = pd.read_pickle('../data/WSC_STATIONS')
def mapWSC(stationList,zoom=8):
# returns a .png map image
locs = ["{0},{1}".format(WSC_STATIONS.ix[s,'LATITUDE'], WSC_STATIONS.ix[s,'LONGITUDE']) \
for s in stationList]
flows = [s for s in stationList if WSC_STATIONS.ix[s,'Flow'] == True]
levels = [s for s in stationList if WSC_STATIONS.ix[s,'Level'] == True]
rSet = set(levels).difference(set(flows))
gSet = set(flows).difference(set(levels))
bSet = set(levels).intersection(set(flows))
google_maps_url = \
"https://maps.googleapis.com/maps/api/staticmap?" + \
"size=640x400" + \
"&zoom={:d}".format(zoom) + \
"&maptype=terrain" + \
"&markers=color:red%7Csize:mid%7C" + \
"|".join(["{0},{1}".format(WSC_STATIONS.ix[s,'LATITUDE'], \
WSC_STATIONS.ix[s,'LONGITUDE']) for s in rSet]) + \
"&markers=color:green%7Csize:mid%7C" + \
"|".join(["{0},{1}".format(WSC_STATIONS.ix[s,'LATITUDE'], \
WSC_STATIONS.ix[s,'LONGITUDE']) for s in gSet]) + \
"&markers=color:blue%7Csize:mid%7C" + \
"|".join(["{0},{1}".format(WSC_STATIONS.ix[s,'LATITUDE'], \
WSC_STATIONS.ix[s,'LONGITUDE']) for s in bSet])
return Image(requests.get(google_maps_url).content)
display(mapWSC(WSC_STATIONS.index))
def getLevelsWSC(s):
'''Return a time series for levels at a given station.'''
ts = {}
data = DLY_LEVELS[DLY_LEVELS['STATION_NUMBER'] == s]
for k in data.index:
mo = str(data.ix[k,'MONTH'])
yr = str(data.ix[k,'YEAR'])
for n in range(1,data.ix[k,'NO_DAYS']+1):
ts[pd.to_datetime(mo+'/'+str(n)+'/'+yr)] = data.ix[k,'LEVEL'+str(n)]
ts = pd.Series(ts)
#drop initial and terminal null entries
j = 0
while j<len(ts.index) and pd.isnull(ts.ix[j]):
j += 1
k = len(ts.index) - 1
while k>=j and pd.isnull(ts.ix[k]):
k += -1
return ts[j:k+1]
WSC_LEVELS = pd.DataFrame({s:getLevelsWSC(s) for s in WSC_STATIONS[WSC_STATIONS['Level']==True].index})
WSC_LEVELS.to_pickle('../data/WSC_LEVELS')
WSC_LEVELS.head()
05PA003 | 05PA005 | 05PA006 | 05PA007 | 05PA010 | 05PA011 | 05PA012 | 05PA013 | 05PB002 | 05PB007 | 05PB012 | 05PB014 | 05PB018 | 05PB023 | 05PB024 | 05PB025 | 05PC018 | 05PC022 | 05PC024 | 05PC025 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1911-08-20 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 336.868 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1911-08-21 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 336.880 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1911-08-22 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 336.865 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1911-08-23 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 336.853 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1911-08-24 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 336.844 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
The DLY_FLOW data is encoded in an irregular tabular format where rows are indexed by station code, year, and month, and columns are indexed by date. Given a station code, the following function decodes DLY_FLOW to produce a pandas times series of flow rates. The function is used to create a pandas dataframe for all flow stations in WSC_STATIONS, then pickles the results to a data file for use by other notebooks in this repository.
def getFlowsWSC(s):
ts = {}
data = DLY_FLOWS[DLY_FLOWS['STATION_NUMBER'] == s]
for k in data.index:
mo = str(data.ix[k,'MONTH'])
yr = str(data.ix[k,'YEAR'])
for n in range(1,data.ix[k,'NO_DAYS']+1):
ts[pd.to_datetime(mo+'/'+str(n)+'/'+yr)] = data.ix[k,'FLOW'+str(n)]
ts = pd.Series(ts)
ts.name = s +': ' + STATIONS.ix[s,'STATION_NAME'] + \
' from ' + '{0}'.format(ts.index[0].year) + \
' to ' + '{0}'.format(ts.index[-1].year)
#drop initial and terminal null entries
j = 0
while j<len(ts.index) and pd.isnull(ts.ix[j]):
j += 1
k = len(ts.index) - 1
while k>=j and pd.isnull(ts.ix[k]):
k += -1
return ts[j:k+1]
WSC_FLOWS = pd.DataFrame({s: getFlowsWSC(s) for s in WSC_STATIONS[WSC_STATIONS['Flow']==True].index})
WSC_FLOWS.to_pickle('../data/WSC_FLOWS')
WSC_FLOWS.head()
05PA001 | 05PA006 | 05PA012 | 05PB001 | 05PB003 | 05PB004 | 05PB009 | 05PB014 | 05PB015 | 05PB018 | 05PB019 | 05PB020 | 05PB021 | 05PB022 | 05PC009 | 05PC010 | 05PC016 | 05PC018 | 05PC019 | 05PC022 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1905-10-25 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 399.0 | NaN |
1905-10-26 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 405.0 | NaN |
1905-10-27 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 396.0 | NaN |
1905-10-28 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 394.0 | NaN |
1905-10-29 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 391.0 | NaN |
The HYDAT database is a collection of data associated with monitoring stations located throughout Canada. The STATIONS table contains a list of stations and attributes, including the latitude and longitude of their position. As an example, here we list attributes for 05PB007, a station monitoring the level of Rainy Lake near Fort Frances, Ontario.
WSC_STATIONS = pd.read_pickle('../data/WSC_STATIONS')
display(WSC_STATIONS.ix['05PB007'])
display(WSC_STATIONS.ix['05PC018'])
STATION_NAME RAINY LAKE NEAR FORT FRANCES PROV_TERR_STATE_LOC ON REGIONAL_OFFICE_ID 5 HYD_STATUS A SED_STATUS NaN LATITUDE 48.6491 LONGITUDE -93.3207 DRAINAGE_AREA_GROSS NaN DRAINAGE_AREA_EFFECT NaN RHBN 0 REAL_TIME 1 CONTRIBUTOR_ID 647 OPERATOR_ID 647 DATUM_ID 100 Level True Flow Name: 05PB007, dtype: object
STATION_NAME RAINY RIVER AT MANITOU RAPIDS PROV_TERR_STATE_LOC ON REGIONAL_OFFICE_ID 5 HYD_STATUS A SED_STATUS NaN LATITUDE 48.6345 LONGITUDE -93.9134 DRAINAGE_AREA_GROSS 50200 DRAINAGE_AREA_EFFECT NaN RHBN 0 REAL_TIME 0 CONTRIBUTOR_ID 647 OPERATOR_ID 600 DATUM_ID 10 Level True Flow True Name: 05PC018, dtype: object
%matplotlib notebook
WSC_STATIONS = pd.read_pickle('../data/WSC_STATIONS')
WSC_LEVELS = pd.read_pickle('../data/WSC_LEVELS')
WSC_FLOWS = pd.read_pickle('../data/WSC_FLOWS')
s = '05PA003'
display(mapWSC([s]))
plt.figure(figsize=(10,4))
WSC_LEVELS[s].dropna().plot()
yrA = str(WSC_LEVELS[s].dropna().index[0].year)
yrB = str(WSC_LEVELS[s].dropna().index[-1].year)
plt.title(WSC_STATIONS.ix[s,'STATION_NAME'] + ' ' + yrA + '-' + yrB)
plt.ylabel('Meters')
s = '05PC019'
display(mapWSC([s],12))
plt.figure(figsize=(10,4))
WSC_FLOWS[s].dropna().plot()
yrA = str(WSC_FLOWS[s].dropna().index[0].year)
yrB = str(WSC_FLOWS[s].dropna().index[-1].year)
plt.title(WSC_STATIONS.ix[s,'STATION_NAME'] + ' ' + yrA + '-' + yrB)
plt.ylabel('Cubic Meters per Second')
<matplotlib.text.Text at 0x1355c8310>
Use of this function is demonstrated by reading and plotting the history of lake levels for Rainy and Namakan Lakes.
WSC_STATIONS = pd.read_pickle('../data/WSC_STATIONS')
WSC_LEVELS = pd.read_pickle('../data/WSC_LEVELS')
WSC_FLOWS = pd.read_pickle('../data/WSC_FLOWS')
plt.figure(figsize=(10,6))
plt.hold(True)
WSC_LEVELS['05PB007'].plot(color='blue') # RL at Fort Frances
WSC_LEVELS['05PA003'].plot(color='green') # NL at Kettle Falls
WSC_LEVELS['05PA013'].plot(color='green') # NL at Squirrel Island
plt.hold(False)
plt.legend(['Rainy Lake','Namakan Reservoir']);
plt.title('History of Rainy Lake and Namakan Lake Levels, 1911-')
plt.grid()
The use of this function is demonstrated by creating a historgram of flows on Rainy River in the period
WSC_STATIONS = pd.read_pickle('../data/WSC_STATIONS')
WSC_LEVELS = pd.read_pickle('../data/WSC_LEVELS')
WSC_FLOWS = pd.read_pickle('../data/WSC_FLOWS')
RR = WSC_FLOWS['05PC019']
RM = WSC_FLOWS['05PC018']
plt.figure(figsize=(10,8))
plt.subplot(2,1,1)
plt.hold(True)
RR.plot(alpha=0.5)
RM.plot(alpha=0.5)
plt.legend([WSC_STATIONS.ix[RR.name,'STATION_NAME'],WSC_STATIONS.ix[RM.name,'STATION_NAME']])
plt.hold(False)
plt.subplot(2,1,2)
plt.hold(True)
RR.hist(bins=100,alpha=0.5)
RM.hist(bins=100,alpha=0.5)
plt.hold(False)
plt.legend([RR.name,RM.name])
<matplotlib.legend.Legend at 0x138ea8410>
WSC_STATIONS = pd.read_pickle('../data/WSC_STATIONS')
WSC_FLOWS = pd.read_pickle('../data/WSC_FLOWS')
A = '05PC019' # Rainy River at Ft. Frances
B = '05PC018' # Rainy River at Manitou Rapids
FLOW = (WSC_FLOWS[B] - WSC_FLOWS[A]).dropna()
plt.figure(figsize=(10,6))
FLOW['1970':].plot()
plt.xlabel('Year')
plt.ylabel('Flow [cubic meters/second]')
plt.title('Difference in Flow on Rainy River between Manitou Rapids and Ft. Frances')
<matplotlib.text.Text at 0x1bc216bd0>
plt.figure(figsize=(10,8))
plt.subplot(2,1,1)
FLOW['1970':'1999'].hist(bins=100,normed=1)
plt.ylim([0,.015])
plt.xlim([-100,1400])
plt.subplot(2,1,2)
FLOW['2000':].hist(bins=100,normed=1)
plt.ylim([0,.015])
plt.xlim([-100,1400])
(-100, 1400)
plt.figure(figsize=(10,6))
FLOW = WSC_FLOWS[B] - WSC_FLOWS[A]
plt.hold(True)
hist,bins = np.histogram([q for q in FLOW['1970':'2000'] if pd.notnull(q)],
bins = 100)
chist = np.cumsum(hist[::-1])[::-1]/float(sum(hist))
plt.semilogy(bins[1:],chist)
plt.hold(True)
hist,bins = np.histogram([q for q in FLOW['2000':] if pd.notnull(q)],
bins = 100)
chist = np.cumsum(hist[::-1])[::-1]/float(sum(hist))
plt.semilogy(bins[1:],chist)
plt.xlim([0,plt.xlim()[1]])
plt.xlim([0,plt.xlim()[1]])
plt.legend(['1970-1999','2000-2014'])
plt.ylabel('Probability of Exceeding a Given Flowrate')
plt.xlabel('Flow [cubic meters/sec]')
plt.title('Frequency-Flow Diagram for Inflows to Rainy River between Ft. Frances and Manitour Rapids')
plt.hold(False)
plt.grid()
# read data sets
WSC_STATIONS = pd.read_pickle('../data/WSC_STATIONS')
WSC_FLOWS = pd.read_pickle('../data/WSC_FLOWS')
WSC_STATIONS.loc[WSC_STATIONS['Flow']==True]['STATION_NAME']
STATION_NUMBER 05PA001 KETTLE RIVER ABOVE KETTLE FALLS 05PA006 NAMAKAN RIVER AT OUTLET OF LAC LA CROIX 05PA012 BASSWOOD RIVER NEAR WINTON 05PB001 SEINE RIVER NEAR LA SEINE 05PB003 MANITOU RIVER ABOVE DEVIL'S CASCADE 05PB004 FOOTPRINT RIVER AT RAINY LAKE FALLS 05PB009 SEINE RIVER AT STURGEON FALLS GENERATING STATION 05PB014 TURTLE RIVER NEAR MINE CENTRE 05PB015 PIPESTONE RIVER ABOVE RAINY LAKE 05PB018 ATIKOKAN RIVER AT ATIKOKAN 05PB019 NORTHEAST TRIBUTARY TO DASHWA LAKE NEAR ATIKOKAN 05PB020 EASTERN TRIBUTARY TO DASHWA LAKE NEAR ATIKOKAN 05PB021 EYE RIVER NEAR HARDTACK LAKE NORTH OF ATIKOKAN 05PB022 EYE RIVER NEAR COULSON LAKE NORTH OF ATIKOKAN 05PC009 LA VALLEE RIVER AT LA VALLEE 05PC010 STURGEON RIVER NEAR BARWICK 05PC016 LA VALLEE RIVER NEAR DEVLIN 05PC018 RAINY RIVER AT MANITOU RAPIDS 05PC019 RAINY RIVER AT FORT FRANCES 05PC022 LA VALLEE RIVER NEAR BURRISS Name: STATION_NAME, dtype: object
stationList = ['05PA006','05PA012','05PB001','05PB003','05PB004','05PB009',
'05PB014','05PB015','05PB018','05PB019','05PB020','05PB021',
'05PB022','05PC009','05PC010','05PC016','05PC022']
for s in stationList:
print s,
print "{0:50s}".format(WSC_STATIONS.loc[s]['STATION_NAME']),
print WSC_FLOWS[s].dropna().index[0].year,
print WSC_FLOWS[s].dropna().index[-1].year
stationList = ['05PA006','05PA012','05PB014','05PB018']
display(mapWSC(stationList))
for s in stationList:
print s,
print "{0:50s}".format(WSC_STATIONS.loc[s]['STATION_NAME']),
print WSC_FLOWS[s].dropna().index[0].year,
print WSC_FLOWS[s].dropna().index[-1].year
for s in stationList:
plt.figure(figsize=(10,4))
WSC_FLOWS[s].plot()
plt.title(s + ': ' + WSC_STATIONS.loc[s]['STATION_NAME'])
05PA006 NAMAKAN RIVER AT OUTLET OF LAC LA CROIX 1921 2014 05PA012 BASSWOOD RIVER NEAR WINTON 1924 2010 05PB001 SEINE RIVER NEAR LA SEINE 1914 1926 05PB003 MANITOU RIVER ABOVE DEVIL'S CASCADE 1914 1915 05PB004 FOOTPRINT RIVER AT RAINY LAKE FALLS 1914 1916 05PB009 SEINE RIVER AT STURGEON FALLS GENERATING STATION 1963 2014 05PB014 TURTLE RIVER NEAR MINE CENTRE 1914 2014 05PB015 PIPESTONE RIVER ABOVE RAINY LAKE 1963 1998 05PB018 ATIKOKAN RIVER AT ATIKOKAN 1978 2014 05PB019 NORTHEAST TRIBUTARY TO DASHWA LAKE NEAR ATIKOKAN 1986 1986 05PB020 EASTERN TRIBUTARY TO DASHWA LAKE NEAR ATIKOKAN 1986 1986 05PB021 EYE RIVER NEAR HARDTACK LAKE NORTH OF ATIKOKAN 1985 1994 05PB022 EYE RIVER NEAR COULSON LAKE NORTH OF ATIKOKAN 1985 1993 05PC009 LA VALLEE RIVER AT LA VALLEE 1952 1959 05PC010 STURGEON RIVER NEAR BARWICK 1952 1986 05PC016 LA VALLEE RIVER NEAR DEVLIN 1960 1986 05PC022 LA VALLEE RIVER NEAR BURRISS 2007 2014
05PA006 NAMAKAN RIVER AT OUTLET OF LAC LA CROIX 1921 2014 05PA012 BASSWOOD RIVER NEAR WINTON 1924 2010 05PB014 TURTLE RIVER NEAR MINE CENTRE 1914 2014 05PB018 ATIKOKAN RIVER AT ATIKOKAN 1978 2014
def WSC_FlowFrequency(s):
display(WSC_STATIONS.ix[s])
display(mapWSC([s],10))
FLOW = WSC_FLOWS[s]['1970':].dropna()
# select period of year
FLOW = FLOW.ix[FLOW.index.dayofyear >= 1] # 135]
FLOW = FLOW.ix[FLOW.index.dayofyear <= 366] # 196]
# show data
plt.figure(figsize=(12,8))
plt.subplot2grid((2,2),(0,0),colspan=2)
FLOW[:'1999'].plot(color='b')
plt.hold(True)
FLOW['2000':].plot(color='g')
plt.xlabel('Year')
plt.ylabel('Flow [cubic meters/second]')
plt.title(s + ': ' + WSC_STATIONS.ix[s,'STATION_NAME'] +
' ' + str(FLOW.index[0].year) + '-' + str(FLOW.index[-1].year))
# histogram distribution of flows
plt.subplot2grid((2,2),(1,0))
plt.hold(True)
hist,bins = np.histogram(FLOW,bins=100)
FLOW['1970':'1999'].hist(bins=bins,normed=1, color='b', alpha=0.5)
FLOW['2000':].hist(bins=bins,normed=1,color='g', alpha=0.5)
plt.ylabel('Frequency')
plt.xlabel('Flow [cubic meters/second]')
plt.legend(['1970-1999','2000-2014'])
plt.tight_layout()
# flow-frequency plots
plt.subplot2grid((2,2),(1,1))
plt.hold(True)
hist,bins = np.histogram(FLOW['1970':'1999'],bins = 100)
chist = np.cumsum(hist[::-1])[::-1]/float(sum(hist))
plt.semilogy(bins[1:],chist)
hist,bins = np.histogram(FLOW['2000':],bins = 100)
chist = np.cumsum(hist[::-1])[::-1]/float(sum(hist))
plt.semilogy(bins[1:],chist,color='g')
plt.xlim([0,plt.xlim()[1]])
plt.legend(['1970-1999','2000-2014'])
plt.title('Probability of Exceeding a Given Flowrate')
plt.xlabel('Flow [cubic meters/sec]')
plt.hold(False)
plt.grid()
plt.tight_layout()
fname = '../images/FlowFrequency_' + s + '.png'
plt.savefig(fname)
!convert $fname -trim $fname
for s in stationList:
WSC_FlowFrequency(s)
STATION_NAME NAMAKAN RIVER AT OUTLET OF LAC LA CROIX PROV_TERR_STATE_LOC ON REGIONAL_OFFICE_ID 5 HYD_STATUS A SED_STATUS NaN LATITUDE 48.3826 LONGITUDE -92.1763 DRAINAGE_AREA_GROSS 13400 DRAINAGE_AREA_EFFECT NaN RHBN 0 REAL_TIME 1 CONTRIBUTOR_ID 647 OPERATOR_ID 647 DATUM_ID 95 Level True Flow True Name: 05PA006, dtype: object
STATION_NAME BASSWOOD RIVER NEAR WINTON PROV_TERR_STATE_LOC ON REGIONAL_OFFICE_ID 5 HYD_STATUS A SED_STATUS NaN LATITUDE 48.0826 LONGITUDE -91.6512 DRAINAGE_AREA_GROSS 4510 DRAINAGE_AREA_EFFECT NaN RHBN 0 REAL_TIME 0 CONTRIBUTOR_ID 647 OPERATOR_ID 600 DATUM_ID 10 Level True Flow True Name: 05PA012, dtype: object
STATION_NAME TURTLE RIVER NEAR MINE CENTRE PROV_TERR_STATE_LOC ON REGIONAL_OFFICE_ID 5 HYD_STATUS A SED_STATUS NaN LATITUDE 48.8502 LONGITUDE -92.7238 DRAINAGE_AREA_GROSS 4767.74 DRAINAGE_AREA_EFFECT NaN RHBN 1 REAL_TIME 1 CONTRIBUTOR_ID 647 OPERATOR_ID 647 DATUM_ID 10 Level True Flow True Name: 05PB014, dtype: object
STATION_NAME ATIKOKAN RIVER AT ATIKOKAN PROV_TERR_STATE_LOC ON REGIONAL_OFFICE_ID 5 HYD_STATUS A SED_STATUS NaN LATITUDE 48.752 LONGITUDE -91.5841 DRAINAGE_AREA_GROSS 358.02 DRAINAGE_AREA_EFFECT NaN RHBN 0 REAL_TIME 1 CONTRIBUTOR_ID 480 OPERATOR_ID 647 DATUM_ID 129 Level True Flow True Name: 05PB018, dtype: object
WSC_STATIONS = pd.read_pickle('../data/WSC_STATIONS')
WSC_LEVELS = pd.read_pickle('../data/WSC_LEVELS')
WSC_FLOWS = pd.read_pickle('../data/WSC_FLOWS')
FLOW = WSC_FLOWS['05PC019']
plt.figure(figsize=(10,8))
plt.subplot(1,1,1)
plt.hold(True)
yrA = str(WSC_FLOWS[s].dropna().index[0].year)
yrB = str(WSC_FLOWS[s].dropna().index[-1].year)
plt.title(WSC_STATIONS.ix[s,'STATION_NAME']
+ ' ' + str(WSC_FLOWS[s].dropna().index[0].year)
+ '-' + str(WSC_FLOWS[s].dropna().index[-1].year))
for (yr,r) in FLOW['1971':'1999'].groupby(FLOW['1971':'1999'].index.year):
shift = datetime.datetime(2014,1,1) - datetime.datetime(yr,1,1)
r = r.tshift(shift.days)
r.cumsum().plot()
plt.figure(figsize=(10,8))
FLOW['1970':'1999'].plot()
plt.grid()
plt.hold(False)
import matplotlib.dates as mdates
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b'))
#fname = img + 'RuleCurvePerformance1970-1999.png'
#plt.savefig(fname)
#!convert $fname -trim $fname
plt.figure(figsize=(12,5))
F = FLOW['1970':].groupby(FLOW['1970':].index.year).mean()
import statsmodels.api as sm
x = F.index
y = F.values
x = sm.add_constant(x)
est = sm.OLS(y,x).fit()
est.summary()
ypred = est.params[0] + F.index*est.params[1]
plt.hold(True)
ax = F.plot(kind='bar',color='b',alpha=0.5)
ax.plot(ax.get_xticks(),ypred,'r--')
plt.hold(False)
plt.title('Mean Annual Flow on Rainy River at Ft. Frances')
plt.xlabel('Year')
plt.ylabel('Cubic Meters per Second')
plt.tight_layout()
[x,y,ypred]
[array([[ 1.00000000e+00, 1.97000000e+03], [ 1.00000000e+00, 1.97100000e+03], [ 1.00000000e+00, 1.97200000e+03], [ 1.00000000e+00, 1.97300000e+03], [ 1.00000000e+00, 1.97400000e+03], [ 1.00000000e+00, 1.97500000e+03], [ 1.00000000e+00, 1.97600000e+03], [ 1.00000000e+00, 1.97700000e+03], [ 1.00000000e+00, 1.97800000e+03], [ 1.00000000e+00, 1.97900000e+03], [ 1.00000000e+00, 1.98000000e+03], [ 1.00000000e+00, 1.98100000e+03], [ 1.00000000e+00, 1.98200000e+03], [ 1.00000000e+00, 1.98300000e+03], [ 1.00000000e+00, 1.98400000e+03], [ 1.00000000e+00, 1.98500000e+03], [ 1.00000000e+00, 1.98600000e+03], [ 1.00000000e+00, 1.98700000e+03], [ 1.00000000e+00, 1.98800000e+03], [ 1.00000000e+00, 1.98900000e+03], [ 1.00000000e+00, 1.99000000e+03], [ 1.00000000e+00, 1.99100000e+03], [ 1.00000000e+00, 1.99200000e+03], [ 1.00000000e+00, 1.99300000e+03], [ 1.00000000e+00, 1.99400000e+03], [ 1.00000000e+00, 1.99500000e+03], [ 1.00000000e+00, 1.99600000e+03], [ 1.00000000e+00, 1.99700000e+03], [ 1.00000000e+00, 1.99800000e+03], [ 1.00000000e+00, 1.99900000e+03], [ 1.00000000e+00, 2.00000000e+03], [ 1.00000000e+00, 2.00100000e+03], [ 1.00000000e+00, 2.00200000e+03], [ 1.00000000e+00, 2.00300000e+03], [ 1.00000000e+00, 2.00400000e+03], [ 1.00000000e+00, 2.00500000e+03], [ 1.00000000e+00, 2.00600000e+03], [ 1.00000000e+00, 2.00700000e+03], [ 1.00000000e+00, 2.00800000e+03], [ 1.00000000e+00, 2.00900000e+03], [ 1.00000000e+00, 2.01000000e+03], [ 1.00000000e+00, 2.01100000e+03], [ 1.00000000e+00, 2.01200000e+03], [ 1.00000000e+00, 2.01300000e+03], [ 1.00000000e+00, 2.01400000e+03]]), array([ 426.35342466, 414.08219178, 299.42622951, 256.78082192, 401.43013699, 288.35342466, 202.9215847 , 288.38438356, 356. , 291.44054795, 148.82650273, 224.21945205, 358.79178082, 279.67671233, 255.98032787, 468.4739726 , 306.23287671, 155.02684932, 247.99918033, 278.46 , 244.00684932, 185.6890411 , 347.94535519, 298.38356164, 294.30054795, 235.71041096, 452.01939891, 238.29753425, 104.80219178, 329.28575342, 259.79180328, 455.07917808, 275.28438356, 108.84931507, 298.98087432, 361.42958904, 194.25589041, 223.01260274, 387.61557377, 389.92054795, 209.52821918, 217.68712329, 216.34849315, 301.61236264, 406.85589041]), Float64Index([305.557368702, 304.78511749, 304.012866279, 303.240615067, 302.468363855, 301.696112643, 300.923861431, 300.151610219, 299.379359007, 298.607107796, 297.834856584, 297.062605372, 296.29035416, 295.518102948, 294.745851736, 293.973600524, 293.201349313, 292.429098101, 291.656846889, 290.884595677, 290.112344465, 289.340093253, 288.567842042, 287.79559083, 287.023339618, 286.251088406, 285.478837194, 284.706585982, 283.93433477, 283.162083559, 282.389832347, 281.617581135, 280.845329923, 280.073078711, 279.300827499, 278.528576287, 277.756325076, 276.984073864, 276.211822652, 275.43957144, 274.667320228, 273.895069016, 273.122817804, 272.350566593, 271.578315381], dtype='float64')]
ypred = est.params[0] + (F.index - 2000)*est.params[1]
ypred
Float64Index([0.964273161183, 0.96183014142, 0.959387121657, 0.956944101894, 0.954501082131, 0.952058062368, 0.949615042606, 0.947172022843, 0.94472900308, 0.942285983317, 0.939842963554, 0.937399943791, 0.934956924028, 0.932513904266, 0.930070884503, 0.92762786474, 0.925184844977, 0.922741825214, 0.920298805451, 0.917855785689, 0.915412765926, 0.912969746163, 0.9105267264, 0.908083706637, 0.905640686874, 0.903197667111, 0.900754647349, 0.898311627586, 0.895868607823, 0.89342558806, 0.890982568297, 0.888539548534, 0.886096528772, 0.883653509009, 0.881210489246, 0.878767469483, 0.87632444972, 0.873881429957, 0.871438410194, 0.868995390432, 0.866552370669, 0.864109350906, 0.861666331143, 0.85922331138, 0.856780291617], dtype='float64')
regressors = sm.tools.tools.add_constant(x)
r = sm.OLS(y,regressors).fit()
print r.summary()
OLS Regression Results ============================================================================== Dep. Variable: 05PC019 R-squared: 0.013 Model: OLS Adj. R-squared: -0.010 Method: Least Squares F-statistic: 0.5558 Date: Thu, 04 Aug 2016 Prob (F-statistic): 0.460 Time: 14:59:34 Log-Likelihood: -6.4195 No. Observations: 45 AIC: 16.84 Df Residuals: 43 BIC: 20.45 Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [95.0% Conf. Int.] ------------------------------------------------------------------------------ const 5.7770 6.528 0.885 0.381 -7.388 18.942 x1 -0.0024 0.003 -0.746 0.460 -0.009 0.004 ============================================================================== Omnibus: 0.490 Durbin-Watson: 1.916 Prob(Omnibus): 0.783 Jarque-Bera (JB): 0.636 Skew: 0.155 Prob(JB): 0.728 Kurtosis: 2.508 Cond. No. 3.06e+05 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 3.06e+05. This might indicate that there are strong multicollinearity or other numerical problems.
plt.figure(figsize=(10,8))
plt.subplot(2,1,1)
A = FLOW['1970':'1999']
B = FLOW['2000':]
totalflow = 24*3600*A.groupby(A.index.year).sum()/10e9
totalflow.hist(bins=20)
plt.title('1970-2000 Mean Annual Total Flow at Ft. Frances = ' + "{:5.4f}".format(totalflow.mean()) + ' Billion Cubic Meters')
plt.xlabel('Billion Cubic Meters')
plt.ylabel('Count')
plt.subplot(2,1,2)
totalflow = 24*3600*B.groupby(B.index.year).sum()/10e9
totalflow.hist(bins=20)
plt.title('2001-2014 Mean Annual Total Flow at Ft. Frances = ' + "{:5.4f}".format(totalflow.mean()) + ' Billion Cubic Meters')
plt.xlabel('Billion Cubic Meters')
plt.ylabel('Count')
plt.tight_layout()
plt.figure(figsize=(10,8))
A = FLOW['1970':'1999']
B = FLOW['2000':]
plt.subplot(2,1,1)
plt.hold(True)
for (yr,r) in A.groupby(A.index.year):
shift = datetime.datetime(2014,1,1) - datetime.datetime(yr,1,1)
r = r.tshift(shift.days)
r.plot()
plt.subplot(2,1,2)
plt.hold(True)
for (yr,r) in B.groupby(B.index.year):
shift = datetime.datetime(2014,1,1) - datetime.datetime(yr,1,1)
r = r.tshift(shift.days)
r.plot()
plt.tight_layout()
plt.figure(figsize=(10,8))
A = FLOW['1970':'1999']
B = FLOW['2000':]
plt.subplot(2,1,1)
plt.hold(True)
for (yr,r) in A.groupby(A.index.year):
shift = datetime.datetime(2014,1,1) - datetime.datetime(yr,1,1)
r = r.tshift(shift.days)
r.cumsum().plot()
plt.subplot(2,1,2)
plt.hold(True)
for (yr,r) in B.groupby(B.index.year):
shift = datetime.datetime(2014,1,1) - datetime.datetime(yr,1,1)
r = r.tshift(shift.days)
r.cumsum().plot()
plt.tight_layout()
tf.mean()
103645.49333333333
hydat.h5
Data Cache¶Reading the entire HYDAT database is a lengthy operation, and because of file size constraints, the entire data set can not be stored in a github repository or hosted on a docker server. For those reaaons, the an HDF5 data cache is created in the local repository to include selected data relevant to the Rainy/Namakan system.
t
from os.path import isfile
from os import remove
if isfile('../data/hydat.h5'):
print 'Delete old ../data/hydat.h5'
remove('../data/hydat.h5')
print 'Creating new ../data/hydat.h5'
hydat = pd.HDFStore('../data/hydat.h5')
# store data in hydat.h5
hydat['STATIONS'] = STATIONS.ix[stationList].groupby(level=0).last()
hydat['DLY_FLOWS'] = DLY_FLOWS[DLY_FLOWS['STATION_NUMBER'].isin(flowStations)]
hydat['DLY_LEVELS'] = DLY_LEVELS[DLY_LEVELS['STATION_NUMBER'].isin(levelStations)]
print hydat
hydat.close()
print hydat
/Users/jeff/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2885: PerformanceWarning: your performance may suffer as PyTables will pickle object types that it cannot map directly to c-types [inferred_type->mixed,key->block2_values] [items->['STATION_NUMBER', 'FLOW_SYMBOL1', 'FLOW_SYMBOL2', 'FLOW_SYMBOL3', 'FLOW_SYMBOL4', 'FLOW_SYMBOL5', 'FLOW_SYMBOL6', 'FLOW_SYMBOL7', 'FLOW_SYMBOL8', 'FLOW_SYMBOL9', 'FLOW_SYMBOL10', 'FLOW_SYMBOL11', 'FLOW_SYMBOL12', 'FLOW_SYMBOL13', 'FLOW_SYMBOL14', 'FLOW_SYMBOL15', 'FLOW_SYMBOL16', 'FLOW_SYMBOL17', 'FLOW_SYMBOL18', 'FLOW_SYMBOL19', 'FLOW_SYMBOL20', 'FLOW_SYMBOL21', 'FLOW_SYMBOL22', 'FLOW_SYMBOL23', 'FLOW_SYMBOL24', 'FLOW_SYMBOL25', 'FLOW_SYMBOL26', 'FLOW_SYMBOL27', 'FLOW_SYMBOL28', 'FLOW_SYMBOL29', 'FLOW_SYMBOL30', 'FLOW_SYMBOL31']] exec(code_obj, self.user_global_ns, self.user_ns) /Users/jeff/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2885: PerformanceWarning: your performance may suffer as PyTables will pickle object types that it cannot map directly to c-types [inferred_type->mixed,key->block2_values] [items->['STATION_NUMBER', 'LEVEL_SYMBOL1', 'LEVEL_SYMBOL2', 'LEVEL_SYMBOL3', 'LEVEL_SYMBOL4', 'LEVEL_SYMBOL5', 'LEVEL_SYMBOL6', 'LEVEL_SYMBOL7', 'LEVEL_SYMBOL8', 'LEVEL_SYMBOL9', 'LEVEL_SYMBOL10', 'LEVEL_SYMBOL11', 'LEVEL_SYMBOL12', 'LEVEL_SYMBOL13', 'LEVEL_SYMBOL14', 'LEVEL_SYMBOL15', 'LEVEL_SYMBOL16', 'LEVEL_SYMBOL17', 'LEVEL_SYMBOL18', 'LEVEL_SYMBOL19', 'LEVEL_SYMBOL20', 'LEVEL_SYMBOL21', 'LEVEL_SYMBOL22', 'LEVEL_SYMBOL23', 'LEVEL_SYMBOL24', 'LEVEL_SYMBOL25', 'LEVEL_SYMBOL26', 'LEVEL_SYMBOL27', 'LEVEL_SYMBOL28', 'LEVEL_SYMBOL29', 'LEVEL_SYMBOL30', 'LEVEL_SYMBOL31']] exec(code_obj, self.user_global_ns, self.user_ns)
Delete old ../data/hydat.h5 Creating new ../data/hydat.h5 <class 'pandas.io.pytables.HDFStore'> File path: ../data/hydat.h5 /DLY_FLOWS frame (shape->[7965,73]) /DLY_LEVELS frame (shape->[5913,74]) /STATIONS frame (shape->[34,14]) <class 'pandas.io.pytables.HDFStore'> File path: ../data/hydat.h5 File is CLOSED
hydat.h5
Data Cache¶from os.path import isfile
hydat = pd.HDFStore('../data/hydat.h5')
# reload from cache (to verify data round trip)
STATIONS = hydat['STATIONS']
DLY_LEVELS = hydat['DLY_LEVELS']
DLY_FLOWS = hydat['DLY_FLOWS']
print hydat
hydat.close()
print hydat
<class 'pandas.io.pytables.HDFStore'> File path: ../data/hydat.h5 /DLY_FLOWS frame (shape->[7965,73]) /DLY_LEVELS frame (shape->[5913,74]) /STATIONS frame (shape->[34,14]) <class 'pandas.io.pytables.HDFStore'> File path: ../data/hydat.h5 File is CLOSED