from sklearn.datasets import make_regression,make_classification
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
X, y = make_classification(n_samples=100,n_features=10,n_informative=2)
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape
((67, 10), (33, 10), (67,), (33,))
# it takes a list of tuples as parameter
pipeline = Pipeline([
('scaler',StandardScaler()),
('clf', LogisticRegression())
])
# use the pipeline object as you would
# a regular classifier
pipeline.fit(X_train,y_train)
Pipeline(memory=None, steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1, penalty='l2', random_state=None, solver='liblinear', tol=0.0001, verbose=0, warm_start=False))])
y_preds = pipeline.predict(X_test)
accuracy_score(y_test,y_preds)
0.84848484848484851