%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
rng = np.random.RandomState(0)
n_steps = 1000
time = np.arange(n_steps)
data_deterministic = (
np.sin(time / 10.)
+ 0.7 * np.sin(time / 12. + 5) * np.sin(time / 5.))
data_noisy = (
data_deterministic
+ 0.2 * np.cumsum(rng.normal(scale=0.3, size=n_steps)))
plt.plot(data_deterministic)
plt.plot(data_noisy)
plt.ylim(-4, 4)
(-4, 4)
diff_deterministic = np.diff(data_deterministic)
diff_noisy = np.diff(data_noisy)
plt.plot(diff_deterministic[100:200])
plt.plot(diff_noisy[100:200])
[<matplotlib.lines.Line2D at 0x1068b1210>]
from scipy.ndimage import filters
target = diff_noisy[4:]
features = np.hstack([
diff_noisy[3:-1].reshape(-1, 1),
diff_noisy[2:-2].reshape(-1, 1),
diff_noisy[1:-3].reshape(-1, 1),
diff_noisy[0:-4].reshape(-1, 1),
])
diff_noisy[3:-1].reshape(-1, 1).shape
(995, 1)
features.shape
(995, 4)
target.shape
(995,)
from sklearn.linear_model import LinearRegression
from sklearn.cross_validation import cross_val_score
scores = cross_val_score(LinearRegression(), features, target, cv=5,
scoring='mean_squared_error')
np.mean(scores), np.std(scores)
(-0.0056889846641182311, 0.00042333703644583146)
from sklearn.ensemble import ExtraTreesRegressor
scores = cross_val_score(ExtraTreesRegressor(n_estimators=10), features, target, cv=5,
scoring='mean_squared_error')
np.mean(scores), np.std(scores)
(-0.0064764457644039212, 0.0006396078877444469)
from sklearn.ensemble import GradientBoostingRegressor
scores = cross_val_score(GradientBoostingRegressor(n_estimators=5, max_depth=3), features, target, cv=5,
scoring='mean_squared_error')
np.mean(scores), np.std(scores)
(-0.0089533892147768551, 0.00085108739653868854)
from sklearn.svm import SVR
scores = cross_val_score(SVR(gamma=0.01, C=.1), features, target, cv=5,
scoring='mean_squared_error')
np.mean(scores), np.std(scores)
(-0.01069472452521234, 0.00088079677269642235)