이 노트북의 코드에 대한 설명은 Pipeline에서 캐싱을 사용하기 글을 참고하세요.
%load_ext watermark
%watermark -v -p sklearn,numpy,scipy,matplotlib
CPython 3.5.6 IPython 6.5.0 sklearn 0.20.1 numpy 1.15.2 scipy 1.1.0 matplotlib 3.0.0
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import make_pipeline
boston = load_boston()
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=0)
param_grid = {'polynomialfeatures__degree': [1, 2, 3, 4, 5],
'ridge__alpha': [0.001, 0.01, 0.1, 1, 10, 100]}
pipe = make_pipeline(StandardScaler(), PolynomialFeatures(), Ridge())
grid = GridSearchCV(pipe, param_grid=param_grid, cv=5, n_jobs=-1, iid=False)
%timeit grid.fit(X_train, y_train)
8.35 s ± 1.44 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
from tempfile import mkdtemp
from shutil import rmtree
cache_dir = mkdtemp()
pipe2 = make_pipeline(StandardScaler(), PolynomialFeatures(), Ridge(), memory=cache_dir)
grid2 = GridSearchCV(pipe2, param_grid=param_grid, cv=5, n_jobs=-1, iid=False)
%timeit grid2.fit(X_train, y_train)
4.89 s ± 981 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
rmtree(cache_dir)