#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd import lale.datasets (train_X, train_y), (test_X, test_y) = lale.datasets.california_housing_df() pd.concat([train_X.head(), train_y.head()], axis=1) # In[2]: from sklearn.preprocessing import StandardScaler as Scale from sklearn.preprocessing import Normalizer as Norm from lale.lib.lale import NoOp from sklearn.decomposition import PCA from sklearn.tree import DecisionTreeRegressor as Tree from sklearn.linear_model import LinearRegression as Linear from xgboost import XGBRegressor as XGB lale.wrap_imported_operators() # In[3]: planned_pipeline = (Scale | Norm | NoOp) >> (PCA | NoOp) >> (Tree | Linear | XGB) planned_pipeline.visualize() # In[4]: from lale.lib.lale import Hyperopt import sklearn.metrics r2 = sklearn.metrics.make_scorer(sklearn.metrics.r2_score) trained_pipeline = planned_pipeline.auto_configure( train_X, train_y, optimizer=Hyperopt, scoring=r2, max_opt_time=3*60, max_eval_time=30, cv=3) # In[5]: print(f'R2 score: {r2(trained_pipeline, test_X, test_y):.2f}') trained_pipeline.visualize() # In[6]: trained_pipeline.pretty_print(ipython_display=True)