import pandas as pd from datetime import datetime import scipy.stats as sts def parse(yr, mon): date = datetime(year=int(yr), day=2, month=int(mon)) return date url='http://www.cpc.ncep.noaa.gov/data/indices/sstoi.indices' data = pd.read_table(url, sep=' ', header=0, skiprows=0, parse_dates = [['YR', 'MON']], skipinitialspace=True, index_col=0, date_parser=parse) print data.describe print '\n' print data.index grouped = data.groupby(lambda x: x.year) grouped zscore = lambda x: (x - x.mean()) / x.std() transformed = grouped.transform(zscore) print transformed['ANOM.3'] npdata = np.genfromtxt(url, skip_header=1) unique_enso_year = [int(value) for value in set(npdata[:, 0])] nin34 = np.zeros(len(unique_enso_year)) for ind, year in enumerate(unique_enso_year): indexes = np.flatnonzero(npdata[:, 0]==year) if len(indexes) == 12: nin34[ind] = np.mean(npdata[indexes, 9]) else: nin34[ind] = np.nan nin34x = (nin34 - sts.nanmean(nin34)) / sts.nanstd(nin34) nin34y = np.vstack((unique_enso_year, nin34x)) nin34y attempt_cheating = (grouped.mean()['ANOM.3'][:-1] - sts.nanmean(grouped.mean()['ANOM.3'][:-1])) / sts.nanstd(grouped.mean()['ANOM.3'][:-1]) attempt_cheating - nin34x[:-1] grouped2 = data.groupby(lambda x: x.year).apply(lambda sdf: sdf if len(sdf) > 11 else None).reset_index(drop=True) print grouped2 print '\n' print grouped2.index (grouped.mean()['ANOM.3'][:-1] - sts.nanmean(grouped.mean()['ANOM.3'][:-1])) / sts.nanstd(grouped.mean()['ANOM.3'][:-1]) gist = !jist -p test.ipynb nbviewer = gist[0].replace('https://gist.github.com/','http://nbviewer.ipython.org/') print nbviewer