This is a simple notebook demo to illustrate typically how OptimalFlow's autoCV modules work with classification problem

In [ ]:
# Install external packages in binder environment.
!pip install xgboost
In [1]:
# Classification Demo 
import pandas as pd
from optimalflow.autoCV import dynaClassifier,evaluate_model
import joblib

tr_features = pd.read_csv('./data/classification/train_features.csv')
tr_labels = pd.read_csv('./data/classification/train_labels.csv')
val_features = pd.read_csv('./data/classification/val_features.csv')
val_labels = pd.read_csv('./data/classification/val_labels.csv')
te_features = pd.read_csv('./data/classification/test_features.csv')
te_labels = pd.read_csv('./data/classification/test_labels.csv')

custom_ml = ['lgr','svm','mlp','rf','ada','gb','xgb','lsvc','sgd','hgboost','rgcv']

clf_cv_demo = dynaClassifier(custom_estimators = custom_ml,random_state = 13,cv_num = 5)

clf_cv_demo.fit(tr_features,tr_labels)

models = {}

for mdl in ['lgr','svm','mlp','rf','ada','gb','xgb','lsvc','sgd','hgboost','rgcv']:
    models[mdl] = joblib.load('./pkl/{}_clf_model.pkl'.format(mdl))

for name, mdl in models.items():
    try:
        ml_evl = evaluate_model(model_type = "cls")
        ml_evl.fit(name, mdl, val_features, val_labels)
    except:
        print(f"Failed to load the {mdl}.")
Now in Progress - Model Selection w/ Cross-validation: Estimate about 0.0 minutes left  [####################] 100.0%

    *optimalflow* autoCV Module ===> rgcv_CrossValidation with 5 folds:
Best Parameters: {'fit_intercept': 'False'}

Best CV Score: 0.803456180567801

lgr -- Accuracy: 0.775 / Precision: 0.712 / Recall: 0.646 / Latency: 1.0s
svm -- Accuracy: 0.747 / Precision: 0.672 / Recall: 0.6 / Latency: 4.5s
mlp -- Accuracy: 0.787 / Precision: 0.745 / Recall: 0.631 / Latency: 1.0s
rf -- Accuracy: 0.809 / Precision: 0.83 / Recall: 0.6 / Latency: 45.6s
ada -- Accuracy: 0.792 / Precision: 0.759 / Recall: 0.631 / Latency: 20.6s
gb -- Accuracy: 0.815 / Precision: 0.796 / Recall: 0.662 / Latency: 3.2s
xgb -- Accuracy: 0.815 / Precision: 0.786 / Recall: 0.677 / Latency: 2.0s
lsvc -- Accuracy: 0.753 / Precision: 0.667 / Recall: 0.646 / Latency: 4.1s
sgd -- Accuracy: 0.775 / Precision: 0.658 / Recall: 0.8 / Latency: 0.0s
hgboost -- Accuracy: 0.815 / Precision: 0.82 / Recall: 0.631 / Latency: 9.1s
rgcv -- Accuracy: 0.753 / Precision: 0.678 / Recall: 0.615 / Latency: 1.0s
In [1]:
# fast Classification Demo 
import pandas as pd
from optimalflow.autoCV import fastClassifier,evaluate_model
import joblib

tr_features = pd.read_csv('./data/classification/train_features.csv')
tr_labels = pd.read_csv('./data/classification/train_labels.csv')
val_features = pd.read_csv('./data/classification/val_features.csv')
val_labels = pd.read_csv('./data/classification/val_labels.csv')
te_features = pd.read_csv('./data/classification/test_features.csv')
te_labels = pd.read_csv('./data/classification/test_labels.csv')

custom_ml = ['lgr','svm','mlp','rf','ada','gb','xgb','lsvc','sgd','hgboost','rgcv']

clf_cv_demo = fastClassifier(custom_estimators = custom_ml,random_state = 13,cv_num = 5,n_comb = 12)

clf_cv_demo.fit(tr_features,tr_labels)

models = {}

for mdl in ['lgr','svm','mlp','rf','ada','gb','xgb','lsvc','sgd','hgboost','rgcv']:
    models[mdl] = joblib.load('./pkl/{}_clf_model.pkl'.format(mdl))

for name, mdl in models.items():
    try:
        ml_evl = evaluate_model(model_type = "cls")
        ml_evl.fit(name, mdl, val_features, val_labels)
    except:
        print(f"Failed to load the {mdl}.")
Now in Progress - Model Selection w/ Cross-validation: Estimate about 0.0 minutes left  [####################] 100.0%

    *optimalflow* autoCV Module ===> rgcv_CrossValidation with 5 folds:
Best Parameters: {'fit_intercept': 'False'}

Best CV Score: 0.803456180567801

lgr -- Accuracy: 0.775 / Precision: 0.712 / Recall: 0.646 / Latency: 3.0s
svm -- Accuracy: 0.747 / Precision: 0.672 / Recall: 0.6 / Latency: 4.4s
mlp -- Accuracy: 0.77 / Precision: 0.7 / Recall: 0.646 / Latency: 5.6s
rf -- Accuracy: 0.809 / Precision: 0.83 / Recall: 0.6 / Latency: 38.2s
ada -- Accuracy: 0.792 / Precision: 0.759 / Recall: 0.631 / Latency: 21.1s
gb -- Accuracy: 0.815 / Precision: 0.796 / Recall: 0.662 / Latency: 4.0s
xgb -- Accuracy: 0.82 / Precision: 0.811 / Recall: 0.662 / Latency: 2.0s
lsvc -- Accuracy: 0.747 / Precision: 0.661 / Recall: 0.631 / Latency: 5.2s
sgd -- Accuracy: 0.64 / Precision: 0.6 / Recall: 0.046 / Latency: 0.0s
hgboost -- Accuracy: 0.815 / Precision: 0.82 / Recall: 0.631 / Latency: 5.3s
rgcv -- Accuracy: 0.753 / Precision: 0.678 / Recall: 0.615 / Latency: 6.3s