| ') #split the table entries
if len(s) == 1: continue
if s[0].find('The official station') > -1: continue
if s[0].startswith('')
ids = re.findall("id='(.*?)'",header)[1:]
titles = re.findall("title=' (.*?)\.?'",header)
if s[0].startswith(' | '): s[0]=s[0][8:]
if s[-1].endswith(' | '): s[-1]=s[-1][:-10]
#skip entire year if missing data
if '' in s:
skipped.append(s[0])
continue
data += [ [int(s[0])] + map(float,s[1:]) ]
print file,'skipping:',' '.join(skipped)
# print ', '.join([str(i)+':'+ids[i] for i in range(len(ids))])
return data
# data returned as a list of columns [[1900 .. 2012],[11.0,6.0, ...], ... ]
##http://weather-warehouse.com/WeatherHistory/PastWeatherData_IthacaCornellUniv_Ithaca_NY_March.html
data=getdata('wdata/March.html')
from scipy import stats
#r,p=stats.pearsonr(xdata,ydata)
#slope, intercept, r_value, p_value, std_err = stats.linregress(xdata,ydata)
#files in subdirectory, e.g., wdata/March.html
yd={}
for m in months:
data=getdata('wdata/'+m+'.html')
yd[m]= [[yr[j] for yr in data] for j in range(12)]
#find all the pearson correlations and slopes for temps
cor={}
for m in months:
cor[m]=[]
for j in range(1,8):
slope, intercept, r_value, p_value, std_err = stats.linregress(yd[m][0],yd[m][j])
cor[m].append(r_value)
#find the biggest correlations
[(m,i+1,cor[m][i]) for m in cor for i in range(len(cor[m])) if cor[m][i] > .15]
#0:year, 1:lotemp, 2:hitemp, 3:himin, 4:lomax, 5:avgmin, 6:avgmax, 7:mean
xdata=yd['February'][0]
ydata=yd['February'][4]
slope, intercept, r_value, p_value, std_err = stats.linregress(xdata,ydata)
print r_value, p_value, std_err
line = slope*array(xdata)+intercept
plot(xdata,ydata,'go',xdata,line,'r-')
xlim(1900,2013)
grid('on')
figure(figsize=(6,9))
#pick Aug as half year away, to compare
xdata=yd['August'][0]
ydata=yd['August'][4]
slope, intercept, r_value, p_value, std_err = stats.linregress(xdata,ydata)
print r_value, p_value, std_err
line = slope*array(xdata)+intercept
plot(xdata,ydata,'o',xdata,line,'m-')
xdata=yd['February'][0]
ydata=yd['February'][4]
slope, intercept, r_value, p_value, std_err = stats.linregress(xdata,ydata)
print r_value, p_value, std_err
line = slope*array(xdata)+intercept
plot(xdata,ydata,'o',xdata,line,'r-')
yticks(linspace(-10,80,19,endpoint=True))
xlim(1900,2013)
legend(['Aug low max','r=.04, p=.74',
'Feb low max','r=.23, p=.017'],
'center left')
grid('on')
#find all the pearson correlations and slopes for rain, snow
cor={}
for m in months:
cor[m]=[]
for j in range(8,12):
slope, intercept, r_value, p_value, std_err = stats.linregress(yd[m][0],yd[m][j])
cor[m].append(r_value)
#find large correl
[(m,i+8,cor[m][i]) for m in cor for i in range(len(cor[m])) if cor[m][i] > .2]
#8:precip, 9:snow, 10:maxprecip, 11:maxsnow,
xdata=yd['September'][0]
ydata=yd['September'][8]
slope, intercept, r_value, p_value, std_err = stats.linregress(xdata,ydata)
print r_value, p_value, std_err
line = slope*array(xdata)+intercept
plot(xdata,ydata,'o',xdata,line,'m-')
ydata=yd['September'][10]
slope, intercept, r_value, p_value, std_err = stats.linregress(xdata,ydata)
print r_value, p_value, std_err
line = slope*array(xdata)+intercept
plot(xdata,ydata,'o',xdata,line,'r-')
grid('on')
xlim(1900,2013)
legend(['Sep precip','correl=.29, p=.007','Sep maxprecip','correl=.31, p=.004'],'upper left')
#find large anti-correl
[(m,i+8,cor[m][i]) for m in cor for i in range(len(cor[m])) if cor[m][i] < -.1]
xdata=yd['April'][0]
ydata=yd['April'][9]
slope, intercept, r_value, p_value, std_err = stats.linregress(xdata,ydata)
print r_value, p_value, std_err
line = slope*array(xdata)+intercept
plot(xdata,ydata,'o',xdata,line,'m-')
ydata=yd['April'][11]
slope, intercept, r_value, p_value, std_err = stats.linregress(xdata,ydata)
print r_value, p_value, std_err
line = slope*array(xdata)+intercept
plot(xdata,ydata,'o',xdata,line,'r-')
grid('on')
xlim(1900,2013)
legend(['April snow','correl=-.13, p=.2','April maxsnow','correl=-.11, p=.26'])
yrdata = {yr:[] for yr in range(1900,2013) if yr not in range(1919,1926)}
for data in [yd[m] for m in months]:
yrs = data[0]
avg = data[7]
for i in range(len(yrs)): yrdata[yrs[i]].append(avg[i])
xdata=[yr for yr in yrdata if len(yrdata[yr]) ==12]
ydata=[mean(yrdata[yr]) for yr in yrdata if len(yrdata[yr]) ==12]
slope, intercept, r_value, p_value, std_err = stats.linregress(xdata,ydata)
print r_value, p_value, std_err
line = slope*array(xdata)+intercept
plot(xdata,ydata,'o',xdata,line,'r-')
xlim(1926,2012)
grid('on')
yrdata[2011]
yrdata[1990]
|
---|