%load_ext load_style
%load_style talk.css
import pandas as pd
import numpy as np
from numpy import ma
from matplotlib import pyplot as plt
from mpl_toolkits.basemap import Basemap
%matplotlib inline
The file (74 Mb) can be downloaded at ftp://ftp.niwa.co.nz/incoming/fauchereaun/ersst.realtime.nc
import xray
dset = xray.open_dataset('../data/ersst.realtime.nc')
dset
dsub = dset.sel(time=slice('1980','2014'), zlev=0, lat=slice(-40,40), lon=slice(120,290))
lat = dsub['lat'].values
lon = dsub['lon'].values
sst = dsub['anom'].values
sst.shape
X = np.reshape(sst, (sst.shape[0], len(lat) * len(lon)), order='F')
np.any(np.isnan(X))
type(X)
X = ma.masked_array(X, np.isnan(X))
type(X)
land = X.sum(0).mask
ocean = -land
X = X[:,ocean]
sklearn.preprocessing.scaler.StandardScaler
¶from sklearn import preprocessing
scaler = preprocessing.StandardScaler()
scaler_sst = scaler.fit(X)
from sklearn.externals import joblib
joblib.dump(scaler_sst, '../data/scaler_sst.pkl', compress=9)
scaler_sst = joblib.load('../data/scaler_sst.pkl')
transform
method of the scaler object¶X = scaler_sst.transform(X)
X.mean()
X.std()
X.shape
from sklearn.decomposition import pca
skpca = pca.PCA()
skpca.fit(X)
joblib.dump(skpca, '../data/EOF.pkl', compress=9)
from matplotlib import style
style.use('fivethirtyeight')
style.available
f, ax = plt.subplots(figsize=(6,6))
ax.plot(skpca.explained_variance_ratio_[0:10]*100)
ax.plot(skpca.explained_variance_ratio_[0:10]*100,'ro')
ipc = np.where(skpca.explained_variance_ratio_.cumsum() >= 0.70)[0][0]
ipc
transform
method of the pca
object (skpca
)¶PCs = skpca.transform(X)
PCs = PCs[:,:ipc]
components_
attribute of the pca
object (skpca
)¶EOFs = skpca.components_
EOFs = EOFs[:ipc,:]
EOFs.shape
EOF_recons = np.ones((ipc, len(lat) * len(lon))) * -999.
for i in xrange(ipc):
EOF_recons[i,ocean] = EOFs[i,:]
EOF_recons = ma.masked_values(np.reshape(EOF_recons, (ipc, len(lat), len(lon)), order='F'), -999.)
EOF_recons.shape
plt.imshow(EOF_recons[0,:,:], origin='lower', interpolation='nearest', aspect='auto')
plt.colorbar();
from sklearn.preprocessing import StandardScaler
scaler_PCs = StandardScaler()
scaler_PCs.fit(PCs)
PCs_std = scaler_PCs.transform(PCs)
joblib.dump(scaler_PCs, '../data/scaler_PCs.pkl')
PCdf = pd.DataFrame(PCs_std, index = dsub['time'], \
columns = ["EOF%s" % (x) for x in xrange(1, PCs_std.shape[1] +1)])
PCdf.head()
PCdf.to_csv('../data/EOF_ERSST_PCs.csv')
from scipy.signal import detrend
f, ax = plt.subplots(figsize=(12,6))
PCdf.ix[:,0].plot(ax=ax, color='k', label='PC1')
#ax.set_xlabel('period', fontsize=18)
ax.plot(PCdf.index, detrend(PCdf.ix[:,0].values), 'r', label='PC1 (trend removed)')
ax.grid('off')
ax.legend(loc=1);
!ipython nbconvert sklearn_EOF_decomposition.ipynb --to html