#!/usr/bin/env python # coding: utf-8 # # Water Survey of Canada HYDAT # Historical data for lake levels and stream flows in the Rainy River watershed are available from several public sources including the [HYDAT database](https://www.ec.gc.ca/rhc-wsc/default.asp?lang=En&n=9018B5EC-1) from the [Water Survey of Canada (WSC)](http://wateroffice.ec.gc.ca/). The [HYDAT database](https://www.ec.gc.ca/rhc-wsc/default.asp?lang=En&n=9018B5EC-1) database consists of Microsoft Access .mdb file providing data on daily water flows and levels throughout Canada. # # For the purposes of these notebooks, individual tables (STATIONS, DLY_FLOWS, and DLY_LEVELS) were extracted from the database as .csv files with [MDB Explorer](http://www.mdbexplorer.com/). This notebook then reads the ..csv file to extract information relevant to the Rainy River basin and stores the results in this repository's data directory. Due to size constraints , only the data relevant to the Rainy River Watershed is included in the repository. Neither the HYDAT database or extracted .csv files are included in this repository. # # This notebooks is run whenever a new version of the HYDAT database becomes available, normally on a quarterly basis, or if additional flow and level stations are needed in the repository data cache. # # # ### Table of Contents # # * [Initialization](#Initialization): Load needed libaries and data tables from the HYDAT database. # * [Monitoring Stations in the Rainy River Watershed](#Monitoring Stations): Create pandas dataframes for all stations with a geographic box bounded by latitude and longitude. Produces a pickled dataframe WSC_STATIONS in the data directory. # * [Pickle Level Data to WSC_LEVELS](#WSC_LEVELS): Extract level data from the HYDAT database and pickle to WSC_LEVELS data set. The pickled dataframe is indexed by date, with columns tagged by station name. # * [Pickle Flow Data to WSC_FLOWS](#WSC_FLOWS): Extract level data from the HYDAT database and pickle to WSC_LEVELS data set. The pickled dataframe is indexed by date, with columns tagged by station name. # # ## Initialization and Loading of HYDAT Data Tables # In[1]: # Standard Python modules import pandas as pd from IPython.core.display import display # In[2]: # Directory where data files have been stored HYDAT_dir = "../data/HYDAT/" # Read data files as a pandas dataframes STATIONS = pd.read_csv(HYDAT_dir + 'STATIONS.csv', index_col = 0); DLY_LEVELS = pd.read_csv(HYDAT_dir + 'DLY_LEVELS.csv') DLY_FLOWS = pd.read_csv(HYDAT_dir + 'DLY_FLOWS.csv') print(" Stations in the HYDAT database = ", len(STATIONS.index)) # # ## Monitoring Stations in the Rainy River Watershed # # The following cell creates a pandas dataframe of monitoring stations from the STATIONS.csv table extracted from the HYDAT database. The extaction searches for all stations with a specified region bounded by latitude and longitudes. # # For reference, this is a map of the [Rainy River drainage](http://www.ijc.org/files/tinymce/uploaded/rl_basinmap.pdf) basin available from the International Joint Commission. # #

# In[3]: # bounding region lngW = -94.0 lngE = -90.5 latS = 47.5 latN = 49.0 # locate monitoring stations within bounding region stationList = STATIONS[(STATIONS['LATITUDE'] >= latS) & \ (STATIONS['LATITUDE'] <= latN) & \ (STATIONS['LONGITUDE'] <= lngE) & \ (STATIONS['LONGITUDE'] >= lngW)].index # reduce data sets to the area of interest WSC_STATIONS = STATIONS.ix[stationList] DLY_LEVELS = DLY_LEVELS.ix[[idx for idx in DLY_LEVELS.index if DLY_LEVELS.ix[idx,'STATION_NUMBER'] in stationList]] DLY_FLOWS = DLY_FLOWS.ix[[idx for idx in DLY_FLOWS.index if DLY_FLOWS.ix[idx,'STATION_NUMBER'] in stationList]] # add columns to STATIONS for level and flow stations WSC_STATIONS['Level'] = '' WSC_STATIONS['Flow'] = '' # mark level and flow stations WSC_STATIONS.ix[set(DLY_LEVELS['STATION_NUMBER']).intersection(WSC_STATIONS.index),'Level'] = True WSC_STATIONS.ix[set(DLY_FLOWS['STATION_NUMBER']).intersection(WSC_STATIONS.index),'Flow'] = True print("Stations within region of interest = ", len(WSC_STATIONS.index)) print("DLY_FLOWS records = ", len(DLY_FLOWS.index)) print("DLY_LEVELS records = ", len(DLY_LEVELS.index)) display(WSC_STATIONS.ix[:,['Level','Flow','STATION_NAME','LATITUDE','LONGITUDE',]]) WSC_STATIONS.to_pickle('../data/WSC_STATIONS') # # ## Pickle Level Data to WSC_LEVELS # In[4]: def getLevelsWSC(s): '''Return a time series for levels at a given station.''' ts = {} data = DLY_LEVELS[DLY_LEVELS['STATION_NUMBER'] == s] for k in data.index: mo = str(data.ix[k,'MONTH']) yr = str(data.ix[k,'YEAR']) for n in range(1,data.ix[k,'NO_DAYS']+1): ts[pd.to_datetime(mo+'/'+str(n)+'/'+yr)] = data.ix[k,'LEVEL'+str(n)] ts = pd.Series(ts) #drop initial and terminal null entries j = 0 while j=j and pd.isnull(ts.ix[k]): k += -1 return ts[j:k+1] WSC_LEVELS = pd.DataFrame({s:getLevelsWSC(s) for s in WSC_STATIONS[WSC_STATIONS['Level']==True].index}) WSC_LEVELS.to_pickle('../data/WSC_LEVELS') WSC_LEVELS.tail() # # ## Pickle Flow Data to WSC_FLOWS # # The DLY_FLOW data is encoded in an irregular tabular format where rows are indexed by station code, year, and month, and columns are indexed by date. Given a station code, the following function decodes DLY_FLOW to produce a pandas times series of flow rates. The function is used to create a pandas dataframe for all flow stations in WSC_STATIONS, then pickles the results to a data file for use by other notebooks in this repository. # In[5]: def getFlowsWSC(s): ts = {} data = DLY_FLOWS[DLY_FLOWS['STATION_NUMBER'] == s] for k in data.index: mo = str(data.ix[k,'MONTH']) yr = str(data.ix[k,'YEAR']) for n in range(1,data.ix[k,'NO_DAYS']+1): ts[pd.to_datetime(mo+'/'+str(n)+'/'+yr)] = data.ix[k,'FLOW'+str(n)] ts = pd.Series(ts) ts.name = s +': ' + STATIONS.ix[s,'STATION_NAME'] + \ ' from ' + '{0}'.format(ts.index[0].year) + \ ' to ' + '{0}'.format(ts.index[-1].year) #drop initial and terminal null entries j = 0 while j=j and pd.isnull(ts.ix[k]): k += -1 return ts[j:k+1] WSC_FLOWS = pd.DataFrame({s: getFlowsWSC(s) for s in WSC_STATIONS[WSC_STATIONS['Flow']==True].index}) WSC_FLOWS.to_pickle('../data/WSC_FLOWS') WSC_FLOWS.tail()