%matplotlib inline
from sklearn import gaussian_process
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
snow = pd.read_csv('nash_snow.csv', na_values='T')[['Year_x', 'Total Snow_y']].dropna()
snow.columns = 'year', 'snowfall'
gp = gaussian_process.GaussianProcess(theta0=1e-2, nugget=1e-12)
gp.fit(np.atleast_2d(snow.year).T, np.array(snow.snowfall))
GaussianProcess(beta0=None, corr=<function squared_exponential at 0x105ffa488>, normalize=True, nugget=array(1e-12), optimizer='fmin_cobyla', random_start=1, random_state=<mtrand.RandomState object at 0x101951cd8>, regr=<function constant at 0x105ffa0c8>, storage_mode='full', theta0=array([[ 0.01]]), thetaL=None, thetaU=None, verbose=False)
# Make the prediction on the meshed x-axis (ask for MSE as well)
x = np.arange(1886, 2015)
y_pred, MSE = gp.predict(np.atleast_2d(x).T, eval_MSE=True)
sigma = np.sqrt(MSE)
plt.figure(figsize=(12,4))
plt.plot(snow.year, snow.snowfall, 'ro')
plt.plot(x, y_pred, 'b-', label=u'Prediction')
plt.fill(np.concatenate([x, x[::-1]]),
np.concatenate([y_pred - 1.9600 * sigma,
(y_pred + 1.9600 * sigma)[::-1]]),
alpha=.5, fc='b', ec='None', label='95% confidence interval')
plt.ylim(0,40)
(0, 40)