#!/usr/bin/env python # coding: utf-8 # # Ice Out Dates # Records of 'ice-out' and 'ice-in' days on selected Minnesota lakes are available from the Minnesota DNR at their web site [Ice Out Dates](http://www.dnr.state.mn.us/ice_out/index.html). The customs for determining ice-out and ice-in vary from lake to lake, so it's difficult to compare across different lakes. The DNR does attempt to use the same observers and criteria for each lake so longitudinal data series can provide useful insights. # ## Initialization # ### Load Python Modules # In[1]: # Display graphics inline with the notebook get_ipython().run_line_magic('matplotlib', 'inline') # Standard Python modules import numpy as np import matplotlib.pyplot as plt import pandas as pd import os import datetime import seaborn as sns sns.set_context('talk') # Statistics modules import statsmodels.api as sm # In[2]: # Directory where data files are stored dir = "../data/" img = "../images/" # ## Rainy Lake # In[3]: # Read .csv file RL_ICEOUT = pd.read_csv(dir + 'IceOut_RL.txt',index_col=0,parse_dates=True,skiprows=1); # Remove duplicated data RL_ICEOUT = RL_ICEOUT[RL_ICEOUT[' source']=='MNDNR-SCO'] # Drop unneeded columns RL_ICEOUT.drop(' source',1,inplace=True) RL_ICEOUT.drop(' comments',1,inplace=True) # Create a new independent variable that counts years from the first date in the data set RL_ICEOUT['DOY'] = RL_ICEOUT.index.dayofyear RL_ICEOUT['n'] = RL_ICEOUT.index.year - RL_ICEOUT.index.year[0] RL_ICEOUT.head() # In[4]: # regression y = RL_ICEOUT['DOY'] x = RL_ICEOUT['n'] def regress(y,x): model = sm.OLS(y,sm.add_constant(x)) results = model.fit() print(results.params) print(results.summary()) plt.plot(y.index.year,y) plt.plot(y.index.year,results.fittedvalues) plt.yticks([92,102,112,122,132,142], ['Apr 1','Apr 10','Apr 20','May 1','May 10','May 20']); ax = plt.gca() # standard deviation of the residuals Sy = np.sqrt(np.sum(results.resid**2)/(len(x)-2)) # plot the prediction interval (i.e., confidence interval for any particular date) from scipy import stats t = stats.t.ppf(1-0.025,df=len(x)-results.df_model-1) conf = t*Sy*np.sqrt(1 + 1.0/len(x) + (x-x.mean())**2/np.sum((x-x.mean())**2)) ax.fill_between(y.index.year,results.fittedvalues-conf,results.fittedvalues+conf,alpha=0.2) # plot confidence interval of the regression conf = t*Sy*np.sqrt(1.0/len(x) + (x-x.mean())**2/np.sum((x-x.mean())**2)) ax.fill_between(y.index.year,results.fittedvalues-conf,results.fittedvalues+conf,alpha=0.4) plt.xlabel('Year') plt.ylabel('Day of Year') regress(y,x) # add labels plt.title('Rainy Lake Ice Out') fname = '../images/IceOut_RL.png' plt.savefig(fname) get_ipython().system('convert $fname -trim $fname') # get current axis for use in plotting data from other lakes rl = plt.gca() # The relatively long data series for Rainy Lake provides a statistically signficant trend line. The ice-out day is consistently getting earlier and, at the some time, becoming more variable. Thus in the last few years, Rainy Lake has set earliest ice-out records and nearly broken lastest ice-out records. # ## Kabetogoma Lake # In[5]: # Read .csv file KL_ICEOUT = pd.read_csv(dir + 'IceOut_KL.txt',index_col=0,parse_dates=True,skiprows=1) # Remove duplicated data KL_ICEOUT = KL_ICEOUT[KL_ICEOUT[' source']=='MNDNR-SCO'] # Drop unneeded columns KL_ICEOUT.drop(' source',1,inplace=True) KL_ICEOUT.drop(' comments',1,inplace=True) # Determine Trend Line KL_ICEOUT['DOY'] = KL_ICEOUT.index.dayofyear KL_ICEOUT['n'] = pd.Series(np.arange(0, len(KL_ICEOUT)), index=KL_ICEOUT.index) y = KL_ICEOUT['DOY'] x = KL_ICEOUT['n'] regress(y,x) plt.title('Kabetogoma Lake Ice Out') plt.ylabel('Day of Year') plt.xlabel('Year') plt.axis(rl.axis()) fname = img + 'IceOut_KL.png' plt.savefig(fname) get_ipython().system('convert $fname -trim $fname') # ## Lake of the Woods # In[6]: # Read .csv file LOW_ICEOUT = pd.read_csv(dir + 'IceOut_LOW.txt',index_col=0,parse_dates=True,skiprows=1); # Remove duplicated data LOW_ICEOUT = LOW_ICEOUT[LOW_ICEOUT[' source']=='MNDNR-SCO'] # Drop unneeded columns LOW_ICEOUT.drop(' source',1,inplace=True) LOW_ICEOUT.drop(' comments',1,inplace=True) # Determine Trend Line LOW_ICEOUT['DOY'] = LOW_ICEOUT.index.dayofyear LOW_ICEOUT['n'] = pd.Series(np.arange(0, len(LOW_ICEOUT)), index=LOW_ICEOUT.index) y = LOW_ICEOUT['DOY'] x = LOW_ICEOUT['n'] regress(y,x) plt.title('Lake of the Woods Ice Out') plt.ylabel('Day of Year') plt.xlabel('Year') plt.axis(rl.axis()) fname = img + 'IceOut_LOW.png' plt.savefig(fname) get_ipython().system('convert $fname -trim $fname') # In[ ]: