In [1]:

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

In [2]:

rng = np.random.RandomState(0)

In [3]:

n_steps = 1000
time = np.arange(n_steps)

data_deterministic = (
    np.sin(time / 10.)
    + 0.7 * np.sin(time / 12. + 5) * np.sin(time / 5.))

data_noisy = (
    data_deterministic
    + 0.2 * np.cumsum(rng.normal(scale=0.3, size=n_steps)))

plt.plot(data_deterministic)
plt.plot(data_noisy)
plt.ylim(-4, 4)

Out[3]:

(-4, 4)

In [5]:

diff_deterministic = np.diff(data_deterministic)
diff_noisy = np.diff(data_noisy)

plt.plot(diff_deterministic[100:200])
plt.plot(diff_noisy[100:200])

Out[5]:

[<matplotlib.lines.Line2D at 0x1068b1210>]

In [490]:

from scipy.ndimage import filters

In [14]:

target = diff_noisy[4:]

features = np.hstack([
    diff_noisy[3:-1].reshape(-1, 1),
    diff_noisy[2:-2].reshape(-1, 1),
    diff_noisy[1:-3].reshape(-1, 1),
    diff_noisy[0:-4].reshape(-1, 1),
])

In [15]:

diff_noisy[3:-1].reshape(-1, 1).shape

Out[15]:

(995, 1)

In [16]:

features.shape

Out[16]:

(995, 4)

In [17]:

target.shape

Out[17]:

(995,)

In [18]:

from sklearn.linear_model import LinearRegression
from sklearn.cross_validation import cross_val_score

In [19]:

scores = cross_val_score(LinearRegression(), features, target, cv=5,
                         scoring='mean_squared_error')
np.mean(scores), np.std(scores)

Out[19]:

(-0.0056889846641182311, 0.00042333703644583146)

In [20]:

from sklearn.ensemble import ExtraTreesRegressor

scores = cross_val_score(ExtraTreesRegressor(n_estimators=10), features, target, cv=5,
                         scoring='mean_squared_error')
np.mean(scores), np.std(scores)

Out[20]:

(-0.0064764457644039212, 0.0006396078877444469)

In [21]:

from sklearn.ensemble import GradientBoostingRegressor

scores = cross_val_score(GradientBoostingRegressor(n_estimators=5, max_depth=3), features, target, cv=5,
                         scoring='mean_squared_error')
np.mean(scores), np.std(scores)

Out[21]:

(-0.0089533892147768551, 0.00085108739653868854)

In [22]:

from sklearn.svm import SVR

scores = cross_val_score(SVR(gamma=0.01, C=.1), features, target, cv=5,
                         scoring='mean_squared_error')
np.mean(scores), np.std(scores)

Out[22]:

(-0.01069472452521234, 0.00088079677269642235)

In [ ]: