To the uninitiated the whole pile of Python stuff looks terribly complicated.
To some extent it is.
But there has been a ton of work done to bring order out of the apparent chaos!
from tpot import TPOTClassifier
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data.astype(np.float64),
iris.target.astype(np.float64), train_size=0.75, test_size=0.25)
tpot = TPOTClassifier(generations=7, population_size=100, verbosity=2, random_state=2)
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))
tpot.export('tpot_iris_pipeline.py')
/home/tom/anaconda3/envs/py36n/lib/python3.6/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20. "This module will be removed in 0.20.", DeprecationWarning) Optimization Progress: 23%|██▎ | 182/800 [00:23<01:10, 8.77pipeline/s]
Generation 1 - Current best internal CV score: 0.9730848861283643
Optimization Progress: 35%|███▍ | 278/800 [00:47<01:26, 6.01pipeline/s]
Generation 2 - Current best internal CV score: 0.9821428571428571
Optimization Progress: 46%|████▋ | 372/800 [00:58<00:25, 16.66pipeline/s]
Generation 3 - Current best internal CV score: 0.9821428571428571
Optimization Progress: 58%|█████▊ | 464/800 [01:15<01:00, 5.51pipeline/s]
Generation 4 - Current best internal CV score: 0.9821428571428571
Optimization Progress: 70%|██████▉ | 556/800 [01:37<00:22, 10.80pipeline/s]
Generation 5 - Current best internal CV score: 0.9904761904761905
Optimization Progress: 80%|████████ | 642/800 [01:48<00:16, 9.72pipeline/s]
Generation 6 - Current best internal CV score: 0.9904761904761905
Generation 7 - Current best internal CV score: 0.9904761904761905 Best pipeline: DecisionTreeClassifier(RBFSampler(XGBClassifier(input_matrix, XGBClassifier__learning_rate=1.0, XGBClassifier__max_depth=DEFAULT, XGBClassifier__min_child_weight=20, XGBClassifier__n_estimators=100, XGBClassifier__nthread=1, XGBClassifier__subsample=0.95), RBFSampler__gamma=0.35), DecisionTreeClassifier__criterion=entropy, DecisionTreeClassifier__max_depth=DEFAULT, DecisionTreeClassifier__min_samples_leaf=15, DecisionTreeClassifier__min_samples_split=10) 0.894736842105
proc=pd.DataFrame(tpot.evaluated_individuals_)
proc.head()
BernoulliNB(BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=DEFAULT), BernoulliNB__alpha=0.01, BernoulliNB__fit_prior=DEFAULT) | BernoulliNB(DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=5, DecisionTreeClassifier__min_samples_leaf=20, DecisionTreeClassifier__min_samples_split=10), BernoulliNB__alpha=100.0, BernoulliNB__fit_prior=True) | BernoulliNB(DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=7, DecisionTreeClassifier__min_samples_leaf=2, DecisionTreeClassifier__min_samples_split=3), BernoulliNB__alpha=0.01, BernoulliNB__fit_prior=DEFAULT) | BernoulliNB(GaussianNB(input_matrix), BernoulliNB__alpha=0.1, BernoulliNB__fit_prior=DEFAULT) | BernoulliNB(LogisticRegression(input_matrix, LogisticRegression__C=DEFAULT, LogisticRegression__dual=DEFAULT, LogisticRegression__penalty=l1), BernoulliNB__alpha=0.1, BernoulliNB__fit_prior=False) | BernoulliNB(Normalizer(input_matrix, Normalizer__norm=l2), BernoulliNB__alpha=0.1, BernoulliNB__fit_prior=DEFAULT) | BernoulliNB(Normalizer(input_matrix, Normalizer__norm=max), BernoulliNB__alpha=0.001, BernoulliNB__fit_prior=True) | BernoulliNB(RobustScaler(input_matrix), BernoulliNB__alpha=1.0, BernoulliNB__fit_prior=False) | BernoulliNB(RobustScaler(input_matrix), BernoulliNB__alpha=100.0, BernoulliNB__fit_prior=DEFAULT) | BernoulliNB(SelectFromModel(input_matrix, SelectFromModel__ExtraTreesClassifier__criterion=DEFAULT, SelectFromModel__ExtraTreesClassifier__max_features=DEFAULT, SelectFromModel__ExtraTreesClassifier__n_estimators=100, SelectFromModel__threshold=0.2), BernoulliNB__alpha=1.0, BernoulliNB__fit_prior=False) | ... | XGBClassifier(input_matrix, XGBClassifier__learning_rate=0.5, XGBClassifier__max_depth=2, XGBClassifier__min_child_weight=4, XGBClassifier__n_estimators=DEFAULT, XGBClassifier__nthread=1, XGBClassifier__subsample=0.95) | XGBClassifier(input_matrix, XGBClassifier__learning_rate=0.5, XGBClassifier__max_depth=2, XGBClassifier__min_child_weight=4, XGBClassifier__n_estimators=DEFAULT, XGBClassifier__nthread=1, XGBClassifier__subsample=DEFAULT) | XGBClassifier(input_matrix, XGBClassifier__learning_rate=0.5, XGBClassifier__max_depth=3, XGBClassifier__min_child_weight=18, XGBClassifier__n_estimators=100, XGBClassifier__nthread=1, XGBClassifier__subsample=0.7) | XGBClassifier(input_matrix, XGBClassifier__learning_rate=0.5, XGBClassifier__max_depth=5, XGBClassifier__min_child_weight=17, XGBClassifier__n_estimators=DEFAULT, XGBClassifier__nthread=1, XGBClassifier__subsample=0.25) | XGBClassifier(input_matrix, XGBClassifier__learning_rate=1.0, XGBClassifier__max_depth=1, XGBClassifier__min_child_weight=19, XGBClassifier__n_estimators=DEFAULT, XGBClassifier__nthread=1, XGBClassifier__subsample=0.8) | XGBClassifier(input_matrix, XGBClassifier__learning_rate=1.0, XGBClassifier__max_depth=1, XGBClassifier__min_child_weight=6, XGBClassifier__n_estimators=100, XGBClassifier__nthread=1, XGBClassifier__subsample=1.0) | XGBClassifier(input_matrix, XGBClassifier__learning_rate=1.0, XGBClassifier__max_depth=2, XGBClassifier__min_child_weight=4, XGBClassifier__n_estimators=DEFAULT, XGBClassifier__nthread=1, XGBClassifier__subsample=0.95) | XGBClassifier(input_matrix, XGBClassifier__learning_rate=1.0, XGBClassifier__max_depth=2, XGBClassifier__min_child_weight=6, XGBClassifier__n_estimators=DEFAULT, XGBClassifier__nthread=1, XGBClassifier__subsample=0.95) | XGBClassifier(input_matrix, XGBClassifier__learning_rate=DEFAULT, XGBClassifier__max_depth=5, XGBClassifier__min_child_weight=17, XGBClassifier__n_estimators=DEFAULT, XGBClassifier__nthread=1, XGBClassifier__subsample=0.25) | XGBClassifier(input_matrix, XGBClassifier__learning_rate=DEFAULT, XGBClassifier__max_depth=DEFAULT, XGBClassifier__min_child_weight=19, XGBClassifier__n_estimators=100, XGBClassifier__nthread=1, XGBClassifier__subsample=0.45) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2.000000 | 2.000000 | 2.00000 | 2.000000 | 2.000000 | 2.000000 | 2.000000 | 2.000000 | 2.000000 | 2.000000 | ... | 1.000000 | 1.000000 | 1.000000 | 1.00000 | 1.000000 | 1.000000 | 1.00000 | 1.00000 | 1.00000 | 1.000000 |
1 | 0.366511 | 0.705642 | 0.93618 | 0.705642 | 0.705642 | 0.366511 | 0.366511 | 0.777433 | 0.705642 | 0.366511 | ... | 0.944876 | 0.944876 | 0.358178 | 0.33913 | 0.433178 | 0.944876 | 0.93618 | 0.93618 | 0.33913 | 0.366511 |
2 rows × 626 columns
+Initial data explore http://localhost:8889/notebooks/Documents/InfluenceH/Working_copies/Cond_fcast_wkg/ccsProfileInitialanalyis.ipynb
+current model
http://localhost:8889/notebooks/Documents/InfluenceH/Working_copies/Cond_fcast_wkg/WIPNNModelonehottarget2.ipynb#
This notebook on Jupyter hub http://nbviewer.jupyter.org/github/dartdog/ML-lunch/blob/master/ML_resources.ipynb
R vs Python (2 pages ) http://www.kdnuggets.com/2017/06/ecosystem-data-science-machine-learning-software.html
!nvidia-smi
Tue Jun 27 07:47:13 2017 +-----------------------------------------------------------------------------+ | NVIDIA-SMI 375.66 Driver Version: 375.66 | |-------------------------------+----------------------+----------------------+ | GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | |===============================+======================+======================| | 0 GeForce GTX 1070 Off | 0000:01:00.0 On | N/A | | N/A 45C P8 10W / N/A | 623MiB / 8105MiB | 0% Default | +-------------------------------+----------------------+----------------------+ +-----------------------------------------------------------------------------+ | Processes: GPU Memory | | GPU PID Type Process name Usage | |=============================================================================| | 0 1098 G /usr/lib/xorg/Xorg 282MiB | | 0 2345 G compiz 65MiB | | 0 2767 G ...anced GL_KHR_blend_equation_advanced_cohe 222MiB | | 0 10292 G ...s-passed-by-fd --v8-snapshot-passed-by-fd 50MiB | +-----------------------------------------------------------------------------+
!nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2016 NVIDIA Corporation Built on Wed_May__4_21:01:56_CDT_2016 Cuda compilation tools, release 8.0, V8.0.26
!cat /proc/driver/nvidia/version
NVRM version: NVIDIA UNIX x86_64 Kernel Module 375.66 Mon May 1 15:29:16 PDT 2017 GCC version: gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.4)