#!/usr/bin/env python
# coding: utf-8
# # Estimating Rainy Lake Inflows 1971-2014
# * Direct Estimate using the Balance Equation
# * Estimating Rainy Lake Inflows with a Kalman Filter
# * Comparing Inflows in 1971-1999 to 2000-2010
# * Correlation of Precipitation with Lake Inflow
# ### Initialization
# Initilization of the graphics system and computational modules used in this IPython notebook.
# In[1]:
# Display graphics inline with the notebook
get_ipython().run_line_magic('matplotlib', 'inline')
# Standard Python modules
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import os
import datetime
# Modules to display images and data tables
from IPython.display import Image
from IPython.core.display import display
# Data Directory
dir = '../data/'
img = '../images/'
# ## Direct Estimate using the Balance Equation
# A crude estimate of inflows can be found starting with the balance equation
#
# $$A_{RL}\frac{dH_{RL}}{dt} = I_{RL}(t) - O_{RL}(t)$$
#
# where the subscript RL refers to Rainy Lake, and $A$, $H$, $I$, and $O$ refer to area, lake level, inflow, and outflow, respectively. Discretizing in time
#
# $$\frac{A_{RL}}{\Delta t}\left(H_{RL}(k+1)-H_{RL}(k)\right) = I_{RL}(k) - O_{RL}(k)$$
#
# Since the sole output flow from Rainy Lake is through Rainy River, we can solve for inflow
#
# $$I_{RL}(k) = \frac{A_{RL}}{\Delta t}\left(H_{RL}(k+1)-H_{RL}(k)\right) + O_{RL}(k)$$
#
# The estimation method is sensitive to small errors in level measurement and therefore provides an estimate of the inflow plagued by excessive noise.
# In[2]:
# Load Rainy River Flow and Rainy Lake Level data
RR = pd.read_pickle(dir + 'RR.pkl')
RL = pd.read_pickle(dir + 'RL.pkl')
# Load regression formulae for lake areas
import pickle
with open(dir + 'area.pkl', 'rb') as handle:
area = pickle.load(handle)
# In[3]:
area
# In[4]:
# Compute time series for Rainy Lake area in sq. meters
areaRL = pd.Series(area['Rainy Lake'](RL), index = RL.index) * 1.0e6
# Estimate Rainy Lake inflow in cubic meters per second
RLinflow = areaRL*(RL.shift(-1) - RL)/(24*3600) + RR
plt.figure(figsize=(10,3.5))
RLinflow.plot(linewidth=0.5)
plt.title('Rainy Lake Inflows {0}-{1}'.format(
RLinflow.index.min().strftime('%Y'),
RLinflow.index.max().strftime('%Y')))
plt.ylabel('cubic meters/sec')
plt.grid()
# ### Constructing an Inflow Data Set for Rainy Lake
#
# Examining the estimated inflows to Rainy Lake, there are evident inconsistencies in the data prior to 1920, and that may extend through to the about 1970. For that reason, only data from 1970 on is retained in a data set to be used for simulating lake levels under alternative control regimes.
# In[5]:
# Plot raw data from 1970 forward
plt.figure(figsize=(10,6))
plt.subplot(3,1,1)
RLinflow['1970':].plot()
plt.title('Rainy Lake Inflow 1970--')
plt.grid()
plt.subplot(3,1,2)
RR['1970':].plot()
plt.title('Rainy Lake Outflow 1970--')
plt.grid()
plt.subplot(3,1,3)
RL['1970':].plot()
plt.title('Rainy Lake Level 1970--')
plt.grid()
plt.tight_layout()
# Plot flow histograms and save image
fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(1,1,1)
plt.hold(True)
RLinflow['1970':].hist(bins=range(0, 3000 + 25, 25), \
color = 'blue', alpha = 0.5, log=True, normed=1)
RR['1970':].hist(bins=range(0, 3000 + 25, 25), \
color = 'yellow', alpha = 0.5, log=True, normed=1)
plt.plot([950.0,950.0],[0,0.001],'r',lw=2,alpha=0.6)
plt.hold(False)
plt.xlim(-50,2000)
plt.ylim(0,0.01)
plt.xlabel('Flow [cubic meters/second]')
plt.ylabel('Frequency')
plt.legend(['Outflow at AGO Level', 'Inflow','Outflow'])
plt.title('Frequency vs Flowrates for Rainy Lake, 1970-2014')
plt.savefig(img + 'RLFlowFreq.png')
# Plot level histograms and save image
fig = plt.figure(figsize=(10,4))
ax = fig.add_subplot(1,1,1)
plt.hold(True)
RL['1970':].hist(bins=np.arange(336.3,339.0,0.05), \
color = 'blue', alpha = 0.5, log=True, normed=1)
plt.plot([337.9,337.9],[0.001,1.0],'r',lw=2)
plt.hold(False)
plt.xlim(336.3,339.0)
plt.ylim(0.001,3)
plt.legend(['AGO','Level'])
plt.xlabel('Level [meters]')
plt.ylabel('Frequency')
plt.title('Frequency vs Stage for Rainy Lake, 1970-2014')
plt.savefig(img + 'RLLevelFreq.png')
# In[6]:
yr = '1970'
RLLevelFlow = pd.concat([RL[yr:],RLinflow[yr:],RR[yr:],],axis=1)
RLLevelFlow.columns = ['H','I','O']
RLLevelFlow = RLLevelFlow.interpolate()
RLLevelFlow.to_csv(dir + 'RLLevelFlow.csv')
RLLevelFlow['1971']
# Plotting the estimated inflow for a typical year demonstrates the noisy characteristics of this estimate.
# In[7]:
yr = '2014'
plt.figure(figsize=(9,7))
plt.subplot(2,1,1)
plt.hold(True)
RLinflow[yr].plot(color='b',linewidth=1)
RR[yr].plot(color='r')
plt.hold(False)
ax = plt.axis()
plt.ylim([0,ax[3]])
plt.ylabel('[cubic meters/sec]')
plt.title('Estimated Inflow to Rainy Lake ' + yr)
plt.legend(['Inflow','Outflow'],loc='upper left')
plt.subplot(2,1,2)
RL[yr].plot()
plt.ylabel('Level [meters]')
plt.title('Rainy Lake Level')
plt.tight_layout()
# ## Estimating Rainy Lake Inflows with a Kalman Filter
# For this first model we assume the inflows and outflows to Rainy Lake are exogenous disturbances driven by zero mean white noise. Later we can consider models that incorporate features such as mean seasonal inflows, the dependence of outflow on lake level, and the an additional state variable representing the International Falls Dam. The purpose of this first model is to simply test the idea that a filtering and estimation techniques can provide better knowledge of the ungauged inflows to Rainy Lake.
#
# The model is written
#
# $$\underbrace{\left[\begin{array}{c} H_{RL}(k+1) \\ I_{RL}(k+1) \\ O_{RL}(k+1) \end{array}\right]}_{x(k+1)} = \underbrace{\left[ \begin{array}{ccc} 1 & \frac{\Delta t}{A_{RL}} & -\frac{\Delta t}{A_{RL}} \\0 & 1 & 0\\0 & 0 & 1\end{array}\right]}_{A} \underbrace{\left[\begin{array}{c} H_{RL}(k) \\ I_{RL}(k) \\ O_{RL}(k) \end{array}\right]}_{x(k)} + \underbrace{\left[ \begin{array}{c} 0 \\ w_{I}(k) \\ w_{O}(k)\end{array}\right]}_{w(k)}$$
#
# where $H$, $I$, $O$ are lake level, inflow, and outflow, respectively. The signals $w_I(k)$, and $w_O(k)$ are zero-mean independent and identically distributed random increments to changing lake inflow and outflows. The vector $w(k)$ is assumed to be distributed as multivariate Normal distribution with zero mean and a covariance $Q$.
#
# The measurement model is written
#
# $$\underbrace{\left[\begin{array}{c} y_H(k) \\ y_O(k) \end{array}\right]}_{y(k)} = \underbrace{\left[\begin{array}{ccc} 1 & 0 & 0 \\ 0 & 0 & 1 \end{array}\right]}_{C} \underbrace{\left[\begin{array}{c} H_{RL}(k) \\ I_{RL}(k) \\ O_{RL}(k) \end{array}\right]}_{x(k)} + \underbrace{\left[\begin{array}{c} v_{H}(k) \\ v_{O}(k) \end{array}\right]}_{v(k)} $$
#
# where the measurement noise vector $v(k)$ is an i.i.d. random variate with zero mean and a covariance $R$.
#
# A Kalman filter provides a two step method for estimate values of the state vector $x(k)$. The the estimate of $x(k)$ using all information available up to time $k-1$ is the prediction step given by
#
# $$\hat{x}(k|k-1) = A \hat{x}(k-1|k-1)$$
#
# with covariance
#
# $$\hat{P}(k|k-1) = A \hat{P}(k-1|k-1) A^T + Q$$
#
# In the measurement update step, an innovation is the difference between the actual measurement at time $k$ versus the predicted measurement
#
# $$e(k) = y(k) - C \hat{x}(k|k-1)$$
#
# with covariance
#
# $$S(k) = C\hat{P}(k|k-1)C^T + R$$
#
# The Kalman filter gain is given by
#
# $$K(k) = \hat{P}(k|k-1)C^TS^{-1}(k)$$
#
# which is used to compute the updated state estimate
#
# $$\hat{x}(k|k) = \hat{x}(x|k-1) + K(k)e(k)$$
#
# and
#
# $$\hat{P}(k|k) = \hat{P}(k|k-1) - K(k)S(k)K^T(k)$$
#
# This is implemented in the following cells for the purpose of estimating inflow to Rainy Lake from historical data of lake levels and outflows.
# In[8]:
areaRL = float(97500*10000)
dt = float(24*3600)
A = np.array([[1.0, dt/areaRL, -dt/areaRL],
[0.0, 1.0, 0.0],
[0.0, 0.0, 1.0]])
C = np.array([[1.0, 0.0, 0.0],
[0.0, 0.0, 1.0]])
# Covariance of state inputs
Q = np.array([[0.0, 0.0, 0.0],
[0.0, 10000.0, 0.0],
[0.0, 0.0, 10000.0]])
# Measurement covariance
R = np.array([[0.0001, 0],[0.0, 1000.0]])
# In[9]:
# Load Rainy Lake Level and Rainy River Flow data
yrA = '1971'
yrB = '2010'
y = pd.concat([pd.read_pickle(dir+'RL.pkl')[yrA:yrB],
pd.read_pickle(dir+'RR.pkl')[yrA:yrB]],axis=1)
y.columns = ['H','O']
y = y.interpolate()
# In[10]:
# DataFrame to store results
x = pd.DataFrame(index = y.index,columns=('H','I','O'))
e = pd.DataFrame(index = y.index,columns=('H','O'))
# Initial values for x and P
x.ix[0] = np.array([y.ix[0]['H'], y.ix[0]['O'], y.ix[0]['O']])
e.ix[0] = np.array([0,0])
P = np.array([[0.001, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]])
# Kalman Filter
for k in range(1,len(y.index)):
# Prediction
xpred = np.dot(A,x.ix[k-1])
Ppred = np.dot(np.dot(A,P),np.transpose(A)) + Q
# Measurement
ymeas = np.array([y.ix[k]['H'],y.ix[k]['O']])
# Update
e.ix[k] = ymeas - np.dot(C,xpred)
S = np.dot(np.dot(C,Ppred),np.transpose(C)) + R
K = np.dot(np.dot(Ppred,np.transpose(C)),np.linalg.inv(S))
x.ix[k] = xpred + np.dot(K,e.ix[k])
P = Ppred - np.dot(np.dot(K,S),np.transpose(K))
# Pickle Rainy Lake Inflows for use in other notebooks
x['I'].to_pickle('../data/RLInflows.pkl')
x['H'].to_pickle('../data/RLLevels.pkl')
x['O'].to_pickle('../data/RLOutflows.pkl')
x.to_csv('../data/RLEstimates.csv')
# In[11]:
plt.figure(figsize=(9,7))
plt.subplot(2,1,1)
plt.hold(True)
x['I'].plot(linewidth=0.5)
x['O'].plot(linewidth=0.5)
plt.hold(False)
plt.title('Estimated Inflow and Outflows ' + yrA + '-' + yrB)
plt.legend(['Estimated Inflow (I)','Estimated Outflow (O)'])
plt.subplot(2,1,2)
plt.hold(True)
x['H'].plot(linewidth=0.5)
RL[yrA:yrB].plot(linewidth=0.5)
plt.hold(False)
plt.title('Estimated Lake Level ' + yrA + '-' + yrB)
plt.legend(['Estimated Level (H)','Measured Level'])
plt.tight_layout()
plt.figure(figsize=(9,7))
plt.subplot(2,1,1)
e['H'].plot(linewidth=0.5)
plt.title('Level Estimation Error ' + yrA + '-' + yrB)
plt.subplot(2,1,2)
e['O'].plot(linewidth=0.5)
plt.title('Outflow Estimation Error ' + yrA + '-' + yrB)
plt.tight_layout()
# ## Comparing Inflows in 1971-1999 to 2000-2010
# In[12]:
# Shift to a common year
I = pd.DataFrame([])
for yr,r in x['I'].groupby(x.index.year):
shift = datetime.datetime(2014,1,1) - datetime.datetime(yr,1,1)
r = r.tshift(shift.days)
r.name = yr
I = pd.concat([I,r],axis=1)
# Plot
plt.figure(figsize=(9,6))
plt.subplot(2,1,1)
idx = range(1971,2000)
I[idx].mean(axis=1).plot(color='r',linewidth=4)
plt.hold(True)
I[idx].plot(ax=plt.gca(),legend=False)
I[idx].mean(axis=1).plot(color='r',linewidth=4)
plt.ylim(0,1800)
plt.title('Rainy Lake Inflow 1971-1999')
plt.hold(False)
import matplotlib.dates as mdates
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b'))
# Plot
plt.subplot(2,1,2)
idx = range(2000,2011)
I[idx].mean(axis=1).plot(color='r',linewidth=4)
plt.hold(True)
I[idx].plot(ax=plt.gca(),legend=False)
I[idx].mean(axis=1).plot(color='r',linewidth=4)
plt.ylim(0,1800)
plt.title('Rainy Lake Inflow 2000-2010')
plt.hold(False)
import matplotlib.dates as mdates
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b'))
plt.tight_layout()
# In[14]:
plt.figure(figsize=(12,6))
plt.hold(True)
q_hi = 80
q_lo = 20
idx = range(1971,2000)
plt.plot(I[idx].index,
np.percentile(I[idx],q=q_hi,axis=1),
color='y',alpha=0.6)
a, = plt.plot(I[idx].index,
np.percentile(I[idx],q=q_lo,axis=1),
color='y',alpha=0.6,label='1971-1999')
plt.fill_between(I[idx].index,
np.percentile(I[idx],q=q_hi,axis=1).tolist(),
np.percentile(I[idx],q=q_lo,axis=1).tolist(),
color='y',alpha=0.6)
idx = range(2000,2011)
plt.plot(I[idx].index,
np.percentile(I[idx],q=q_hi,axis=1),
color='b',alpha=0.4)
b, = plt.plot(I[idx].index,
np.percentile(I[idx],q=q_lo,axis=1),
color='b',alpha=0.4,label='2000-2010')
plt.fill_between(I[idx].index,
np.percentile(I[idx],q=q_hi,axis=1).tolist(),
np.percentile(I[idx],q=q_lo,axis=1).tolist(),
color='b',alpha=0.4)
plt.hold(False)
plt.title('{:d}th to {:d}th Percentiles of Rainy Lake Inflows'.format(q_lo,q_hi))
plt.ylabel('[cubic meters/sec]')
plt.legend(handles=[a,b])
import matplotlib.dates as mdates
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b'))
fname = '../images/RainyLakeInflows.png'
plt.savefig(fname)
get_ipython().system('convert $fname -trim $fname')
get_ipython().system('convert $fname -transparent white $fname')
# In[18]:
get_ipython().run_cell_magic('capture', 'capt', '%run -i Predicted_Effect_of_Rule_Curve_Changes_on_Rainy_River_Flows.ipynb\n')
# In[19]:
areaNL = 25973 * 10000
dNL1970 = 0.5*(NL1970['LRC']+NL1970['URC']).diff()
dNL2000 = 0.5*(NL2000['LRC']+NL2000['URC']).diff()
qNL1970 = -areaNL*dNL1970/(24*3600)
qNL2000 = -areaNL*dNL2000/(24*3600)
dqNL = qNL2000 - qNL1970
plt.figure(figsize=(12,6))
plt.hold(True)
(I[range(2000,2011)].mean(axis=1)-I[range(1971,2000)].mean(axis=1)).plot()
plt.fill_between(dqNL.index,0, dqNL.tolist(), color='b', alpha='0.4')
plt.hold(False)
plt.title('Change in Mean Rainy Lake Inflow, 1971-1999 versus 2000-2011')
plt.ylabel('cubic meters/sec')
plt.gca().set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun',
'Jul','Aug','Sep','Oct','Nov','Dec']);
fname = '../images/ChangeInMeanInflow.png'
plt.savefig(fname)
get_ipython().system('convert $fname -trim $fname')
get_ipython().system('convert $fname -transparent white $fname')
# ## Emergency Conditions
# In[ ]:
RLInflows = pd.read_pickle('./data/RLInflows.pkl')
df = pd.concat([RLInflows,RL['1971':'2010']],axis=1)
df.columns=['I','H']
df['I'].plot(linewidth=0.5)
plt.hold(True)
df['H'].plot(linewidth=2)
plt.hold(False)
plt.ylim(0,2400)
# In[ ]:
plt.scatter(df.ix['1971':'1999']['I'],df.ix['1971':'1999']['H'],color='b',alpha=0.5)
plt.hold(True)
plt.scatter(df.ix['2000':'2010']['I'],df.ix['2000':'2010']['H'],color='r',alpha=0.5)
ax = plt.axis()
plt.plot([ax[0],ax[1]],[337.90,337.90],'y--')
plt.plot([ax[0],ax[1]],[337.75,337.75],'y--')
plt.plot([ax[0],ax[1]],[337.20,337.20],'y--')
plt.plot([ax[0],ax[1]],[336.70,336.70],'y--')
plt.xlim(ax[0],ax[1])
plt.hold(False)
# ## Modeling of Rainy Lake Inflows
# ### Load Inflow Data
# In[ ]:
# Load inflow data
RLInflows = pd.read_pickle('./data/RLInflows.pkl')
def plotInflowData(Q):
plt.figure(figsize=(10,3.5))
Q.plot(linewidth=0.75)
plt.title('Rainy Lake Inflows {0}-{1}'.format(
Q.index[0].strftime('%Y'),
Q.index[-1].strftime('%Y')))
plt.ylabel('cubic meters/sec');
plotInflowData(RLInflows)
plotInflowData(RLInflows['1980':'1985'])
# ### Descriptive Statistics
# In[ ]:
RLInflows.hist(bins=np.arange(-100,2500,20),alpha=0.5)
# In[ ]:
from pandas.tools.plotting import lag_plot
lag_plot(RLInflows,lag=1)
plt.axis('equal')
plt.hold(True)
plt.plot([RLInflows.min(),RLInflows.max()],[RLInflows.min(),RLInflows.max()],'r--')
plt.hold(False)
plt.title('Lag Plot of Daily Rainy Lake Inflows {0}-{1}'.format(
RLInflows.index.min().strftime('%Y'),
RLInflows.index.max().strftime('%Y')))
plt.xlabel('Inflow(t) cubic meters/sec')
plt.ylabel('Inflow(t+1) cubic meters/sec');
# ### Fitting a Time Series Model
# The augmented Dickey-Fuller test is used to test for a unit root in a univariate process.
# In[17]:
import statsmodels.api as sm
(adf, pvalue, usedlag, nobs, cvalues, icbtest) = sm.tsa.adfuller(RLInflows)
print adf
print pvalue
# The test statistic is strongly negative, and we can soundly reject a unit root and therefore a the non-stationary hypothesis. This was also abundantly clear from the original data.
#
# Next we consider autocorrelations
# In[18]:
import statsmodels.graphics.tsaplots as tsaplots
plt.hold(True)
plt.close()
tsaplots.plot_acf(RLInflows,lags = 120);
plt.hold(False)
plt.figure()
plt.hold(True)
plt.close()
tsaplots.plot_pacf(RLInflows,lags = 20);
plt.hold(False)
# In[19]:
sm.tsa.arma_order_select_ic(RLInflows.tolist(), ic=['aic', 'bic'], trend='nc')
# In[20]:
arma_mod = sm.tsa.ARMA(RLInflows.tolist(), (4,2)).fit()
print "Coeff Value Std. Err. pvalue"
K = arma_mod.k_trend
k = 0
j = 0
while k < K:
print 'tr[{0}] = {1:9.4f} {2:7.4f} {3:7.4f}'.format(
j, arma_mod.params[k], arma_mod.bse[k], arma_mod.pvalues[k])
j += 1
k += 1
K += arma_mod.k_exog
j = 0
while k < K:
print 'ex[{0}] = {1:9.4f} {2:7.4f} {3:7.4f}'.format(
j, arma_mod.params[k], arma_mod.bse[k], arma_mod.pvalues[k])
j += 1
k += 1
print ''
K += arma_mod.k_ar
j = 0
while k < K:
print 'ar[{0}] = {1:9.4f} {2:7.4f} {3:7.4f}'.format(
j, arma_mod.params[k], arma_mod.bse[k], arma_mod.pvalues[k])
j += 1
k += 1
print ''
K += arma_mod.k_ma
j = 1
while k < K:
print 'ma[{0}] = {1:9.4f} {2:7.4f} {3:7.4f}'.format(
j, arma_mod.params[k], arma_mod.bse[k], arma_mod.pvalues[k])
j += 1
k += 1
print ''
# In[21]:
RLPred = pd.Series(arma_mod.predict(1,len(RLInflows),dynamic=False),
index=RLInflows.index)
yrA = '1971'
yrB = '1975'
RLInflows[yrA:yrB].plot(linewidth=0.5)
plt.hold(True)
RLPred[yrA:yrB].plot(linewidth=0.5)
(RLPred[yrA:yrB]-RLInflows[yrA:yrB]).plot(linewidth=0.5)
plt.ylabel('cubic meters/sec')
plt.hold(False)
# ## Analysis of Residuals
# In[22]:
r = pd.Series(arma_mod.resid,index=RLInflows.index).copy()
plt.figure(figsize=(10,3.5))
r.ix[0:100].plot(linewidth=1)
import scipy.signal
a = np.concatenate((np.array([1]),-arma_mod.arparams))
b = np.concatenate((np.array([1]),arma_mod.maparams))
f = pd.Series(scipy.signal.lfilter(a,b,r) ,index = RLInflows.index)
f.ix[0:100].plot(linewidth=1)
plt.hold(True)
# In[23]:
r.mean()
# In[24]:
sm.stats.durbin_watson(r)
# In[25]:
from scipy import stats
stats.normaltest(r)
# The very low value returned from `scipy.stats.normaltest()` tells us that the residuals are not Normally distributed.
# In[26]:
rmean = pd.Series([d.mean() for t,d in f.groupby(f.index.dayofyear)])
rmean_smooth = pd.rolling_window(
pd.concat([rmean.tshift(-len(rmean),freq=1),rmean,rmean.tshift(len(rmean),freq=1)]),
30,'triang',center=True)[rmean.index]
rstd = pd.Series([d.std() for t,d in f.groupby(f.index.dayofyear)])
rstd_smooth = pd.rolling_window(
pd.concat([rstd.tshift(-len(rstd),freq=1),rstd,rstd.tshift(len(rstd),freq=1)]),
30,'triang',center=True)[rstd.index]
plt.subplot(2,1,1)
plt.hold(True)
rmean.plot(linewidth=1)
rmean_smooth.plot(style='r')
plt.xlabel('Day of Year')
plt.ylabel('cubic meters/sec')
plt.title('Residual Mean')
plt.hold(False)
plt.subplot(2,1,2)
plt.hold(True)
rstd.plot(linewidth=1)
rstd_smooth.plot(style='r')
plt.xlabel('Day of Year')
plt.ylabel('cubic meters/sec')
plt.title('Residual Standard Deviation')
plt.hold(False)
plt.tight_layout()
# In[27]:
s = r.copy()
for k in s.index:
s.ix[k] = (s.ix[k] - rmean_smooth.ix[k.dayofyear-1])
s.ix[k] = s.ix[k]/rstd_smooth.ix[k.dayofyear-1]
plt.figure(figsize=(12,3.5))
s.plot(linewidth=0.5);
plt.title('Residuals')
plt.ylabel('cubic meters/sec')
print stats.normaltest(s)
from scipy import stats
df,loc,scale = stats.t.fit(s)
dist = stats.t(df,loc=loc,scale=scale)
plt.figure(figsize=(10.2,3.5))
plt.subplot(1,3,1)
s.hist(bins=200,normed=True)
plt.hold(True)
xmin,xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
plt.plot(x, dist.pdf(x), 'r', linewidth=2)
plt.xlim(-6,6)
plt.title('Probability Density Function')
plt.xlabel('Residual')
plt.ylabel('Probability')
plt.legend(['t-distribution','Data'],loc='upper left')
plt.hold(False)
print df,loc,scale
plt.subplot(1,3,2)
y = [v for v in sorted(s.tolist())]
x = [dist.ppf(float(k)/float(1+len(s))) for k in range(1,len(s)+1)]
plt.hold(True)
plt.scatter(x,y)
plt.plot([min(x),max(x)],[min(x),max(x)],'r')
plt.axis('equal')
plt.xlabel('Fitted Response')
plt.ylabel('Ordered Response')
plt.title('Probability Plot')
plt.hold(False)
plt.subplot(1,3,3)
lag_plot(s)
plt.axis('equal')
plt.title('Lag Plot')
plt.xlabel('s(k)')
plt.ylabel('s(k+1)')
plt.tight_layout()
# Removing the seasonality reduced the chi-squared statistic, but it is still clearly a non-Normal distribution.
# ## Simulation
# In[28]:
#e = pd.Series(dist.rvs(len(s)), index = s.index)
e = pd.Series(0, index = s.index)
plt.figure(figsize=(12,7))
for k in e.index:
e.ix[k] = random.choice(s)
plt.subplot(2,1,1)
e.plot(linewidth=0.5)
for k in e.index:
e.ix[k] = e.ix[k]*rstd_smooth.ix[k.dayofyear-1]
#e.ix[k] = random.choice(s)*rstd_smooth.ix[k.dayofyear-1]
e.ix[k] += rmean_smooth.ix[k.dayofyear-1]
plt.subplot(2,1,2)
e.plot(linewidth=0.5)
import scipy.signal
a = np.concatenate((np.array([1]),-arma_mod.arparams))
b = np.concatenate((np.array([1]),arma_mod.maparams))
f = pd.Series(scipy.signal.lfilter(b,a,e) + arma_mod.params[0],index = e.index)
plotInflowData(f)
f['1971':'1975'].plot(linewidth=0.5)
plt.hold(True)
#RLInflows['1981':'1981'].plot()
#(f-RLInflows['1981':'1981']).plot()
#e.plot(linewidth=0.5)
plt.hold(False)
print f.mean()
# In[ ]:
# In[ ]:
plotInflowData(f)
# In[ ]:
#N = 5001
#e = pd.Series(dist.rvs(N), index = pd.date_range(pd.datetime.today().date(), periods=N))
#e = s['1981':'1981'].copy()
#for k in e.index:
# e.ix[k] = e.ix[k]*rstd_smooth.ix[k.dayofyear-1]
# e.ix[k] += rmean_smooth.ix[k.dayofyear-1]
import scipy.signal
a = np.concatenate((np.array([1]),-arma_mod.arparams))
b = np.concatenate((np.array([1]),arma_mod.maparams))
f = pd.Series(scipy.signal.lfilter(b,a,e) + arma_mod.params[0],index = e.index)
f['1971':'1975'].plot(linewidth=0.5)
plt.hold(True)
#RLInflows['1981':'1981'].plot()
#(f-RLInflows['1981':'1981']).plot()
#e.plot(linewidth=0.5)
plt.hold(False)
print f.mean()
# In[ ]:
import scipy.signal
a = np.concatenate((np.array([1]),-arma_mod.arparams))
b = np.concatenate((np.array([1]),arma_mod.maparams))
y = RLInflows.tolist()
e = scipy.signal.lfilter(a,b,y)
e = pd.Series(e,index=RLInflows.index)
e['1986'].plot(linewidth=0.5)
plt.hold(True)
tsaplots.plot_acf(e,lags = 100);
plt.hold(False)
e = e - e.mean()
ep = e.ix[e >= 0]
en = -e.ix[e <= 0]
en.plot()
en.hist(bins=range(0,300),normed=True,color='g',alpha=0.5)
plt.hold(True)
ep.hist(bins=range(0,300),color='r',alpha=0.5,normed=True)
plt.hold(False)
plt.xlim(0,200)
# In[ ]:
from pandas.tools.plotting import lag_plot
lag_plot(RLInflows['1971':'1999'].diff(),color='b',alpha=0.4)
plt.hold(True)
lag_plot(RLInflows['2000':'2010'].diff(),color='r',alpha=0.4)
plt.hold(False)
plt.axis('equal')
# In[ ]:
RLInflows['1971':'1999'].diff().hist(bins=np.arange(-400,1200,20),color='b',alpha=0.5,normed=True)
plt.hold(True)
RLInflows['2000':'2011'].diff().hist(bins=np.arange(-400,1200,20),color='r',alpha=0.5,normed=True)
plt.hold(False)
# ## Correlation of Precipitation with Lake Inflow
# In[ ]:
x['I'].plot()
# In[ ]:
KINL = pd.read_pickle(dir+'KINL.pkl')
plt.hold(True)
(10*KINL[yrA:yrB]).plot()
x['I'].plot()
plt.hold(False)
# In[ ]: