Historical data for lake levels and stream flows in the Rainy River watershed are available from several public sources including the HYDAT database from the Water Survey of Canada (WSC). The HYDAT database database consists of Microsoft Access .mdb file providing data on daily water flows and levels throughout Canada.
For the purposes of these notebooks, individual tables (STATIONS, DLY_FLOWS, and DLY_LEVELS) were extracted from the database as .csv files with MDB Explorer. This notebook then reads the ..csv file to extract information relevant to the Rainy River basin and stores the results in this repository's data directory. Due to size constraints , only the data relevant to the Rainy River Watershed is included in the repository. Neither the HYDAT database or extracted .csv files are included in this repository.
This notebooks is run whenever a new version of the HYDAT database becomes available, normally on a quarterly basis, or if additional flow and level stations are needed in the repository data cache.
# Standard Python modules
import pandas as pd
from IPython.core.display import display
# Directory where data files have been stored
HYDAT_dir = "../data/HYDAT/"
# Read data files as a pandas dataframes
STATIONS = pd.read_csv(HYDAT_dir + 'STATIONS.csv', index_col = 0);
DLY_LEVELS = pd.read_csv(HYDAT_dir + 'DLY_LEVELS.csv')
DLY_FLOWS = pd.read_csv(HYDAT_dir + 'DLY_FLOWS.csv')
print(" Stations in the HYDAT database = ", len(STATIONS.index))
Stations in the HYDAT database = 7775
The following cell creates a pandas dataframe of monitoring stations from the STATIONS.csv table extracted from the HYDAT database. The extaction searches for all stations with a specified region bounded by latitude and longitudes.
For reference, this is a map of the Rainy River drainage basin available from the International Joint Commission.
# bounding region
lngW = -94.0
lngE = -90.5
latS = 47.5
latN = 49.0
# locate monitoring stations within bounding region
stationList = STATIONS[(STATIONS['LATITUDE'] >= latS) & \
(STATIONS['LATITUDE'] <= latN) & \
(STATIONS['LONGITUDE'] <= lngE) & \
(STATIONS['LONGITUDE'] >= lngW)].index
# reduce data sets to the area of interest
WSC_STATIONS = STATIONS.ix[stationList]
DLY_LEVELS = DLY_LEVELS.ix[[idx for idx in DLY_LEVELS.index if DLY_LEVELS.ix[idx,'STATION_NUMBER'] in stationList]]
DLY_FLOWS = DLY_FLOWS.ix[[idx for idx in DLY_FLOWS.index if DLY_FLOWS.ix[idx,'STATION_NUMBER'] in stationList]]
# add columns to STATIONS for level and flow stations
WSC_STATIONS['Level'] = ''
WSC_STATIONS['Flow'] = ''
# mark level and flow stations
WSC_STATIONS.ix[set(DLY_LEVELS['STATION_NUMBER']).intersection(WSC_STATIONS.index),'Level'] = True
WSC_STATIONS.ix[set(DLY_FLOWS['STATION_NUMBER']).intersection(WSC_STATIONS.index),'Flow'] = True
print("Stations within region of interest = ", len(WSC_STATIONS.index))
print("DLY_FLOWS records = ", len(DLY_FLOWS.index))
print("DLY_LEVELS records = ", len(DLY_LEVELS.index))
display(WSC_STATIONS.ix[:,['Level','Flow','STATION_NAME','LATITUDE','LONGITUDE',]])
WSC_STATIONS.to_pickle('../data/WSC_STATIONS')
Stations within region of interest = 34 DLY_FLOWS records = 7965 DLY_LEVELS records = 5913
Level | Flow | STATION_NAME | LATITUDE | LONGITUDE | |
---|---|---|---|---|---|
STATION_NUMBER | |||||
05PA001 | True | KETTLE RIVER ABOVE KETTLE FALLS | 48.49444 | -92.64444 | |
05PA003 | True | NAMAKAN LAKE ABOVE KETTLE FALLS DAM | 48.50000 | -92.63886 | |
05PA005 | True | NORTHERN LIGHT LAKE AT OUTLET | 48.24028 | -90.76889 | |
05PA006 | True | True | NAMAKAN RIVER AT OUTLET OF LAC LA CROIX | 48.38256 | -92.17631 |
05PA007 | True | CROOKED LAKE NEAR CURTAIN FALLS | 48.23750 | -91.90611 | |
05PA010 | True | FRENCH LAKE NEAR ATIKOKAN | 48.67222 | -91.13500 | |
05PA011 | True | LAC LA CROIX AT CAMPBELL'S CAMP | 48.35508 | -92.21744 | |
05PA012 | True | True | BASSWOOD RIVER NEAR WINTON | 48.08256 | -91.65117 |
05PA013 | True | NAMAKAN LAKE AT SQUIRREL ISLAND | 48.49686 | -92.65856 | |
05PB001 | True | SEINE RIVER NEAR LA SEINE | 48.75000 | -92.20000 | |
05PB002 | True | LITTLE TURTLE LAKE NEAR MINE CENTRE | 48.77222 | -92.60833 | |
05PB003 | True | MANITOU RIVER ABOVE DEVIL'S CASCADE | 48.97500 | -93.34167 | |
05PB004 | True | FOOTPRINT RIVER AT RAINY LAKE FALLS | 48.85833 | -93.57500 | |
05PB007 | True | RAINY LAKE NEAR FORT FRANCES | 48.64912 | -93.32068 | |
05PB009 | True | SEINE RIVER AT STURGEON FALLS GENERATING STATION | 48.74444 | -92.28472 | |
05PB012 | True | LAC DES MILLE LACS ABOVE OUTLET DAM | 48.98000 | -90.73000 | |
05PB014 | True | True | TURTLE RIVER NEAR MINE CENTRE | 48.85022 | -92.72383 |
05PB015 | True | PIPESTONE RIVER ABOVE RAINY LAKE | 48.56861 | -92.52417 | |
05PB018 | True | True | ATIKOKAN RIVER AT ATIKOKAN | 48.75197 | -91.58408 |
05PB019 | True | NORTHEAST TRIBUTARY TO DASHWA LAKE NEAR ATIKOKAN | 48.95083 | -91.71222 | |
05PB020 | True | EASTERN TRIBUTARY TO DASHWA LAKE NEAR ATIKOKAN | 48.94056 | -91.69833 | |
05PB021 | True | EYE RIVER NEAR HARDTACK LAKE NORTH OF ATIKOKAN | 48.92500 | -91.66222 | |
05PB022 | True | EYE RIVER NEAR COULSON LAKE NORTH OF ATIKOKAN | 48.89444 | -91.66750 | |
05PB023 | True | RAINY LAKE AT NORTHWEST BAY | 48.84167 | -93.62000 | |
05PB024 | True | RAINY LAKE NEAR BEAR PASS | 48.70058 | -92.95800 | |
05PB025 | True | RAINY LAKE AT STOKES BAY | 48.53611 | -92.56111 | |
05PC009 | True | LA VALLEE RIVER AT LA VALLEE | 48.62083 | -93.62500 | |
05PC010 | True | STURGEON RIVER NEAR BARWICK | 48.68750 | -93.98333 | |
05PC016 | True | LA VALLEE RIVER NEAR DEVLIN | 48.59028 | -93.67278 | |
05PC018 | True | True | RAINY RIVER AT MANITOU RAPIDS | 48.63447 | -93.91336 |
05PC019 | True | RAINY RIVER AT FORT FRANCES | 48.60853 | -93.40344 | |
05PC022 | True | True | LA VALLEE RIVER NEAR BURRISS | 48.67844 | -93.66522 |
05PC024 | True | RAINY RIVER AT PITHERS POINT SITE NO.1 | 48.61389 | -93.35472 | |
05PC025 | True | RAINY RIVER AT PITHERS POINT SITE NO.2 | 48.61625 | -93.35992 |
def getLevelsWSC(s):
'''Return a time series for levels at a given station.'''
ts = {}
data = DLY_LEVELS[DLY_LEVELS['STATION_NUMBER'] == s]
for k in data.index:
mo = str(data.ix[k,'MONTH'])
yr = str(data.ix[k,'YEAR'])
for n in range(1,data.ix[k,'NO_DAYS']+1):
ts[pd.to_datetime(mo+'/'+str(n)+'/'+yr)] = data.ix[k,'LEVEL'+str(n)]
ts = pd.Series(ts)
#drop initial and terminal null entries
j = 0
while j<len(ts.index) and pd.isnull(ts.ix[j]):
j += 1
k = len(ts.index) - 1
while k>=j and pd.isnull(ts.ix[k]):
k += -1
return ts[j:k+1]
WSC_LEVELS = pd.DataFrame({s:getLevelsWSC(s) for s in WSC_STATIONS[WSC_STATIONS['Level']==True].index})
WSC_LEVELS.to_pickle('../data/WSC_LEVELS')
WSC_LEVELS.tail()
05PA003 | 05PA005 | 05PA006 | 05PA007 | 05PA010 | 05PA011 | 05PA012 | 05PA013 | 05PB002 | 05PB007 | 05PB012 | 05PB014 | 05PB018 | 05PB023 | 05PB024 | 05PB025 | 05PC018 | 05PC022 | 05PC024 | 05PC025 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2014-12-27 | NaN | NaN | 360.451 | NaN | NaN | NaN | NaN | 339.987 | NaN | NaN | NaN | 0.915 | 386.296 | 8.433 | 337.361 | NaN | NaN | 10.775 | 337.325 | 337.324 |
2014-12-28 | NaN | NaN | 360.449 | NaN | NaN | NaN | NaN | 339.981 | NaN | NaN | NaN | 0.913 | 386.287 | 8.426 | 337.356 | NaN | NaN | 10.779 | 337.318 | 337.317 |
2014-12-29 | NaN | NaN | 360.447 | NaN | NaN | NaN | NaN | 339.975 | NaN | NaN | NaN | 0.910 | 386.295 | 8.421 | 337.351 | NaN | NaN | 10.788 | 337.313 | 337.312 |
2014-12-30 | NaN | NaN | 360.445 | NaN | NaN | NaN | NaN | 339.968 | NaN | NaN | NaN | 0.907 | 386.297 | 8.418 | 337.346 | NaN | NaN | 10.796 | 337.308 | 337.307 |
2014-12-31 | NaN | NaN | 360.444 | NaN | NaN | NaN | NaN | 339.962 | NaN | NaN | NaN | 0.904 | 386.303 | 8.418 | 337.341 | NaN | NaN | 10.795 | 337.303 | 337.301 |
The DLY_FLOW data is encoded in an irregular tabular format where rows are indexed by station code, year, and month, and columns are indexed by date. Given a station code, the following function decodes DLY_FLOW to produce a pandas times series of flow rates. The function is used to create a pandas dataframe for all flow stations in WSC_STATIONS, then pickles the results to a data file for use by other notebooks in this repository.
def getFlowsWSC(s):
ts = {}
data = DLY_FLOWS[DLY_FLOWS['STATION_NUMBER'] == s]
for k in data.index:
mo = str(data.ix[k,'MONTH'])
yr = str(data.ix[k,'YEAR'])
for n in range(1,data.ix[k,'NO_DAYS']+1):
ts[pd.to_datetime(mo+'/'+str(n)+'/'+yr)] = data.ix[k,'FLOW'+str(n)]
ts = pd.Series(ts)
ts.name = s +': ' + STATIONS.ix[s,'STATION_NAME'] + \
' from ' + '{0}'.format(ts.index[0].year) + \
' to ' + '{0}'.format(ts.index[-1].year)
#drop initial and terminal null entries
j = 0
while j<len(ts.index) and pd.isnull(ts.ix[j]):
j += 1
k = len(ts.index) - 1
while k>=j and pd.isnull(ts.ix[k]):
k += -1
return ts[j:k+1]
WSC_FLOWS = pd.DataFrame({s: getFlowsWSC(s) for s in WSC_STATIONS[WSC_STATIONS['Flow']==True].index})
WSC_FLOWS.to_pickle('../data/WSC_FLOWS')
WSC_FLOWS.tail()
05PA001 | 05PA006 | 05PA012 | 05PB001 | 05PB003 | 05PB004 | 05PB009 | 05PB014 | 05PB015 | 05PB018 | 05PB019 | 05PB020 | 05PB021 | 05PB022 | 05PC009 | 05PC010 | 05PC016 | 05PC018 | 05PC019 | 05PC022 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2014-12-27 | NaN | 36.7 | NaN | NaN | NaN | NaN | 36.9 | 22.5 | NaN | 1.27 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 170.0 | 0.027 |
2014-12-28 | NaN | 36.6 | NaN | NaN | NaN | NaN | 37.4 | 22.4 | NaN | 1.26 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 170.0 | 0.026 |
2014-12-29 | NaN | 36.5 | NaN | NaN | NaN | NaN | 37.8 | 22.2 | NaN | 1.25 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 169.0 | 0.026 |
2014-12-30 | NaN | 36.4 | NaN | NaN | NaN | NaN | 37.1 | 22.0 | NaN | 1.24 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 169.0 | 0.026 |
2014-12-31 | NaN | 36.3 | NaN | NaN | NaN | NaN | 38.0 | 21.9 | NaN | 1.24 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 169.0 | 0.026 |