import ROOT
Welcome to JupyROOT 6.09/01
# Select Theano as backend for Keras
from os import environ
environ['KERAS_BACKEND'] = 'theano'
# Set architecture of system (AVX instruction set is not supported on SWAN)
environ['THEANO_FLAGS'] = 'gcc.cxxflags=-march=corei7'
from keras.models import Sequential
from keras.layers.core import Dense, Dropout
from keras.optimizers import Adam
Using Theano backend.
# Open file
data = ROOT.TFile.Open('https://raw.githubusercontent.com/iml-wg/tmvatutorials/master/inputdata.root')
# Get signal and background trees from file
signal = data.Get('TreeS')
background = data.Get('TreeB')
# Add variables to dataloader
dataloader = ROOT.TMVA.DataLoader('dataset_pymva')
numVariables = len(signal.GetListOfBranches())
for branch in signal.GetListOfBranches():
dataloader.AddVariable(branch.GetName())
# Add trees to dataloader
dataloader.AddSignalTree(signal, 1.0)
dataloader.AddBackgroundTree(background, 1.0)
trainTestSplit = 0.8
dataloader.PrepareTrainingAndTestTree(ROOT.TCut(''),
'TrainTestSplit_Signal={}:'.format(trainTestSplit)+\
'TrainTestSplit_Background={}:'.format(trainTestSplit)+\
'SplitMode=Random')
DataSetInfo : [dataset_pymva] : Added class "Signal" : Add Tree TreeS of type Signal with 6000 events DataSetInfo : [dataset_pymva] : Added class "Background" : Add Tree TreeB of type Background with 6000 events : Dataset[dataset_pymva] : Class index : 0 name : Signal : Dataset[dataset_pymva] : Class index : 1 name : Background
# Setup TMVA
ROOT.TMVA.Tools.Instance()
ROOT.TMVA.PyMethodBase.PyInitialize()
outputFile = ROOT.TFile.Open('TMVAOutputPyMVA.root', 'RECREATE')
factory = ROOT.TMVA.Factory('TMVAClassification', outputFile,
'!V:!Silent:Color:DrawProgressBar:Transformations=I,G:'+\
'AnalysisType=Classification')
# Define model
model = Sequential()
model.add(Dense(32, init='glorot_normal', activation='relu',
input_dim=numVariables))
model.add(Dropout(0.5))
model.add(Dense(32, init='glorot_normal', activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, init='glorot_uniform', activation='softmax'))
# Set loss and optimizer
model.compile(loss='categorical_crossentropy', optimizer=Adam(),
metrics=['categorical_accuracy',])
# Store model to file
model.save('model.h5')
# Print summary of model
model.summary()
____________________________________________________________________________________________________ Layer (type) Output Shape Param # Connected to ==================================================================================================== dense_1 (Dense) (None, 32) 160 dense_input_1[0][0] ____________________________________________________________________________________________________ dropout_1 (Dropout) (None, 32) 0 dense_1[0][0] ____________________________________________________________________________________________________ dense_2 (Dense) (None, 32) 1056 dropout_1[0][0] ____________________________________________________________________________________________________ dropout_2 (Dropout) (None, 32) 0 dense_2[0][0] ____________________________________________________________________________________________________ dense_3 (Dense) (None, 2) 66 dropout_2[0][0] ==================================================================================================== Total params: 1282 ____________________________________________________________________________________________________
WARNING (theano.gof.cmodule): WARNING: your Theano flags `gcc.cxxflags` specify an `-march=X` flags. It is better to let Theano/g++ find it automatically, but we don't do it now WARNING:theano.gof.cmodule:WARNING: your Theano flags `gcc.cxxflags` specify an `-march=X` flags. It is better to let Theano/g++ find it automatically, but we don't do it now
Just run the cells that contain the classifiers you want to try.
# Keras interface with previously defined model
factory.BookMethod(dataloader, ROOT.TMVA.Types.kPyKeras, 'PyKeras',
'H:!V:VarTransform=G:FilenameModel=model.h5:'+\
'NumEpochs=10:BatchSize=32:'+\
'TriesEarlyStopping=3')
<ROOT.TMVA::MethodPyKeras object ("PyKeras") at 0x77e48b0>
Factory : Booking method: PyKeras
:
PyKeras : [dataset_pymva] : Create Transformation "G" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
: Load model from file: model.h5
# Gradient tree boosting from scikit-learn package
factory.BookMethod(dataloader, ROOT.TMVA.Types.kPyGTB, 'GTB',
'H:!V:VarTransform=None:'+\
'NEstimators=100:LearningRate=0.1:MaxDepth=3')
<ROOT.TMVA::MethodPyGTB object ("GTB") at 0x77c0a30>
Factory : Booking method: GTB
:
DataSetFactory : [dataset_pymva] : Number of events in input trees
:
:
: Dataset[dataset_pymva] : Weight renormalisation mode: "EqualNumEvents": renormalises all event classes ...
: Dataset[dataset_pymva] : such that the effective (weighted) number of events in each class is the same
: Dataset[dataset_pymva] : (and equals the number of events (entries) given for class=0 )
: Dataset[dataset_pymva] : ... i.e. such that Sum[i=1..N_j]{w_i} = N_classA, j=classA, classB, ...
: Dataset[dataset_pymva] : ... (note that N_j is the sum of TRAINING events
: Dataset[dataset_pymva] : ..... Testing events are not renormalised nor included in the renormalisation factor!)
: Number of training and testing events
: ---------------------------------------------------------------------------
: Signal -- training events : 4800
: Signal -- testing events : 1200
: Signal -- training and testing events: 6000
: Background -- training events : 4800
: Background -- testing events : 1200
: Background -- training and testing events: 6000
:
DataSetInfo : Correlation matrix (Signal):
: ----------------------------------------
: var1 var2 var3 var4
: var1: +1.000 +0.379 +0.585 +0.813
: var2: +0.379 +1.000 +0.691 +0.727
: var3: +0.585 +0.691 +1.000 +0.848
: var4: +0.813 +0.727 +0.848 +1.000
: ----------------------------------------
DataSetInfo : Correlation matrix (Background):
: ----------------------------------------
: var1 var2 var3 var4
: var1: +1.000 +0.852 +0.914 +0.964
: var2: +0.852 +1.000 +0.925 +0.935
: var3: +0.914 +0.925 +1.000 +0.970
: var4: +0.964 +0.935 +0.970 +1.000
: ----------------------------------------
DataSetFactory : [dataset_pymva] :
:
/cvmfs/sft-nightlies.cern.ch/lcg/views/dev3/Sat/x86_64-slc6-gcc49-opt/lib/python2.7/site-packages/ipykernel/__main__.py:4: DeprecationWarning: PyArray_FromDims: use PyArray_SimpleNew. /cvmfs/sft-nightlies.cern.ch/lcg/views/dev3/Sat/x86_64-slc6-gcc49-opt/lib/python2.7/site-packages/ipykernel/__main__.py:4: DeprecationWarning: PyArray_FromDimsAndDataAndDescr: use PyArray_NewFromDescr.
factory.TrainAllMethods()
Factory : Train all methods Factory : [dataset_pymva] : Create Transformation "I" with events from all classes. : : Transformation, Variable selection : : Input : variable 'var1' <---> Output : variable 'var1' : Input : variable 'var2' <---> Output : variable 'var2' : Input : variable 'var3' <---> Output : variable 'var3' : Input : variable 'var4' <---> Output : variable 'var4' Factory : [dataset_pymva] : Create Transformation "G" with events from all classes. : : Transformation, Variable selection : : Input : variable 'var1' <---> Output : variable 'var1' : Input : variable 'var2' <---> Output : variable 'var2' : Input : variable 'var3' <---> Output : variable 'var3' : Input : variable 'var4' <---> Output : variable 'var4' : Preparing the Gaussian transformation... TFHandler_Factory : Variable Mean RMS [ Min Max ] : ----------------------------------------------------------- : var1: 0.0065519 0.99843 [ -3.1728 5.7307 ] : var2: 0.0068699 1.0010 [ -3.1728 5.7307 ] : var3: 0.0067702 1.0001 [ -3.1728 5.7307 ] : var4: 0.0066114 0.99911 [ -3.1728 5.7307 ] : ----------------------------------------------------------- : Ranking input variables (method unspecific)... Id_GaussTransformation : Ranking result (top variable is best ranked) : ----------------------------- : Rank : Variable : Separation : ----------------------------- : 1 : var4 : 3.445e-01 : 2 : var3 : 2.750e-01 : 3 : var1 : 2.670e-01 : 4 : var2 : 2.116e-01 : ----------------------------- Factory : Train method: PyKeras for Classification : : : ================================================================ : H e l p f o r M V A m e t h o d [ PyKeras ] : : : Keras is a high-level API for the Theano and Tensorflow packages. : This method wraps the training and predictions steps of the Keras : Python package for TMVA, so that dataloading, preprocessing and : evaluation can be done within the TMVA system. To use this Keras : interface, you have to generate a model with Keras first. Then, : this model can be loaded and trained in TMVA. : : : <Suppress this message by specifying "!H" in the booking option> : ================================================================ : : Preparing the Gaussian transformation... TFHandler_PyKeras : Variable Mean RMS [ Min Max ] : ----------------------------------------------------------- : var1: 0.0065519 0.99843 [ -3.1728 5.7307 ] : var2: 0.0068699 1.0010 [ -3.1728 5.7307 ] : var3: 0.0067702 1.0001 [ -3.1728 5.7307 ] : var4: 0.0066114 0.99911 [ -3.1728 5.7307 ] : ----------------------------------------------------------- : Option SaveBestOnly: Only model weights with smallest validation loss will be stored : Option TriesEarlyStopping: Training will stop after 3 number of epochs with no improvement of validation loss Train on 9600 samples, validate on 2400 samples Epoch 1/10 9376/9600 [============================>.] - ETA: 0s - loss: 0.6087 - categorical_accuracy: 0.6496Epoch 00000: val_loss improved from inf to 0.53422, saving model to dataset_pymva/weights/TrainedModel_PyKeras.h5 9600/9600 [==============================] - 0s - loss: 0.6084 - categorical_accuracy: 0.6504 - val_loss: 0.5342 - val_categorical_accuracy: 0.7600 Epoch 2/10 9504/9600 [============================>.] - ETA: 0s - loss: 0.5125 - categorical_accuracy: 0.7488Epoch 00001: val_loss improved from 0.53422 to 0.43809, saving model to dataset_pymva/weights/TrainedModel_PyKeras.h5 9600/9600 [==============================] - 0s - loss: 0.5119 - categorical_accuracy: 0.7492 - val_loss: 0.4381 - val_categorical_accuracy: 0.7975 Epoch 3/10 9440/9600 [============================>.] - ETA: 0s - loss: 0.4697 - categorical_accuracy: 0.7728Epoch 00002: val_loss improved from 0.43809 to 0.40123, saving model to dataset_pymva/weights/TrainedModel_PyKeras.h5 9600/9600 [==============================] - 0s - loss: 0.4688 - categorical_accuracy: 0.7732 - val_loss: 0.4012 - val_categorical_accuracy: 0.8125 Epoch 4/10 9504/9600 [============================>.] - ETA: 0s - loss: 0.4404 - categorical_accuracy: 0.7942Epoch 00003: val_loss improved from 0.40123 to 0.37674, saving model to dataset_pymva/weights/TrainedModel_PyKeras.h5 9600/9600 [==============================] - 0s - loss: 0.4401 - categorical_accuracy: 0.7946 - val_loss: 0.3767 - val_categorical_accuracy: 0.8421 Epoch 5/10 9248/9600 [===========================>..] - ETA: 0s - loss: 0.4240 - categorical_accuracy: 0.8005Epoch 00004: val_loss improved from 0.37674 to 0.37410, saving model to dataset_pymva/weights/TrainedModel_PyKeras.h5 9600/9600 [==============================] - 0s - loss: 0.4237 - categorical_accuracy: 0.8004 - val_loss: 0.3741 - val_categorical_accuracy: 0.8433 Epoch 6/10 9280/9600 [============================>.] - ETA: 0s - loss: 0.4082 - categorical_accuracy: 0.8196Epoch 00005: val_loss improved from 0.37410 to 0.35731, saving model to dataset_pymva/weights/TrainedModel_PyKeras.h5 9600/9600 [==============================] - 0s - loss: 0.4079 - categorical_accuracy: 0.8198 - val_loss: 0.3573 - val_categorical_accuracy: 0.8433 Epoch 7/10 9248/9600 [===========================>..] - ETA: 0s - loss: 0.3998 - categorical_accuracy: 0.8212Epoch 00006: val_loss improved from 0.35731 to 0.34590, saving model to dataset_pymva/weights/TrainedModel_PyKeras.h5 9600/9600 [==============================] - 0s - loss: 0.4001 - categorical_accuracy: 0.8206 - val_loss: 0.3459 - val_categorical_accuracy: 0.8471 Epoch 8/10 9280/9600 [============================>.] - ETA: 0s - loss: 0.3886 - categorical_accuracy: 0.8292Epoch 00007: val_loss improved from 0.34590 to 0.34174, saving model to dataset_pymva/weights/TrainedModel_PyKeras.h5 9600/9600 [==============================] - 0s - loss: 0.3882 - categorical_accuracy: 0.8297 - val_loss: 0.3417 - val_categorical_accuracy: 0.8483 Epoch 9/10 9440/9600 [============================>.] - ETA: 0s - loss: 0.3905 - categorical_accuracy: 0.8263Epoch 00008: val_loss did not improve 9600/9600 [==============================] - 0s - loss: 0.3903 - categorical_accuracy: 0.8261 - val_loss: 0.3448 - val_categorical_accuracy: 0.8517 Epoch 10/10 9248/9600 [===========================>..] - ETA: 0s - loss: 0.3858 - categorical_accuracy: 0.8269Epoch 00009: val_loss did not improve 9600/9600 [==============================] - 0s - loss: 0.3846 - categorical_accuracy: 0.8276 - val_loss: 0.3449 - val_categorical_accuracy: 0.8462 : Elapsed time for training with 9600 events: 13.3 sec : Creating xml weight file: dataset_pymva/weights/TMVAClassification_PyKeras.weights.xml : Creating standalone class: dataset_pymva/weights/TMVAClassification_PyKeras.class.C Factory : Training finished : Factory : Train method: GTB for Classification : : : ================================================================ : H e l p f o r M V A m e t h o d [ GTB ] : : : --- Short description: : : Decision Trees and Rule-Based Models : : --- Performance optimisation: : : : --- Performance tuning via configuration options: : : <None> : : <Suppress this message by specifying "!H" in the booking option> : ================================================================ : ('deviance', 0.1, 100, 1.0, 2, 1, 0.0, 3, None, None, None, 0, None, 0) GradientBoostingClassifier(init=None, learning_rate=0.1, loss='deviance', max_depth=3, max_features=None, max_leaf_nodes=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=100, presort='auto', random_state=None, subsample=1.0, verbose=0, warm_start=0) : : --- Saving State File In:dataset_pymva/weights/PyGTBModel.PyData : : Elapsed time for training with 9600 events: 1.11 sec : Dataset[dataset_pymva] : Evaluation of GTB on training sample (9600 events) : Dataset[dataset_pymva] : Elapsed time for evaluation of 9600 events: 0.0345 sec : Creating xml weight file: dataset_pymva/weights/TMVAClassification_GTB.weights.xml : Creating standalone class: dataset_pymva/weights/TMVAClassification_GTB.class.C Factory : Training finished : : Ranking input variables (method specific)... : No variable ranking supplied by classifier: PyKeras : No variable ranking supplied by classifier: GTB Factory : === Destroy and recreate all methods via weight files for testing === :
/cvmfs/sft-nightlies.cern.ch/lcg/views/dev3/Sat/x86_64-slc6-gcc49-opt/lib/python2.7/site-packages/ipykernel/__main__.py:1: DeprecationWarning: PyArray_FromDims: use PyArray_SimpleNew. if __name__ == '__main__': /cvmfs/sft-nightlies.cern.ch/lcg/views/dev3/Sat/x86_64-slc6-gcc49-opt/lib/python2.7/site-packages/ipykernel/__main__.py:1: DeprecationWarning: PyArray_FromDimsAndDataAndDescr: use PyArray_NewFromDescr. if __name__ == '__main__':
factory.TestAllMethods()
Factory : Test all methods Factory : Test method: PyKeras for Classification performance : : Load model from file: dataset_pymva/weights/TrainedModel_PyKeras.h5 Factory : Test method: GTB for Classification performance : : : --- Loading State File From:dataset_pymva/weights/PyGTBModel.PyData : : Dataset[dataset_pymva] : Evaluation of GTB on testing sample (2400 events) : Dataset[dataset_pymva] : Elapsed time for evaluation of 2400 events: 0.00952 sec
/cvmfs/sft-nightlies.cern.ch/lcg/views/dev3/Sat/x86_64-slc6-gcc49-opt/lib/python2.7/site-packages/ipykernel/__main__.py:1: DeprecationWarning: PyArray_FromDims: use PyArray_SimpleNew. if __name__ == '__main__': /cvmfs/sft-nightlies.cern.ch/lcg/views/dev3/Sat/x86_64-slc6-gcc49-opt/lib/python2.7/site-packages/ipykernel/__main__.py:1: DeprecationWarning: PyArray_FromDimsAndDataAndDescr: use PyArray_NewFromDescr. if __name__ == '__main__':
factory.EvaluateAllMethods()
Factory : Evaluate all methods Factory : Evaluate classifier: PyKeras : TFHandler_PyKeras : Variable Mean RMS [ Min Max ] : ----------------------------------------------------------- : var1: -0.019674 1.0126 [ -2.8208 5.7307 ] : var2: -0.025370 0.99752 [ -3.1672 5.7307 ] : var3: -0.025914 1.0079 [ -3.0141 5.7307 ] : var4: -0.023154 1.0059 [ -2.9557 5.7307 ] : ----------------------------------------------------------- PyKeras : [dataset_pymva] : Loop over test events and fill histograms with classifier response... : TFHandler_PyKeras : Variable Mean RMS [ Min Max ] : ----------------------------------------------------------- : var1: -0.019674 1.0126 [ -2.8208 5.7307 ] : var2: -0.025370 0.99752 [ -3.1672 5.7307 ] : var3: -0.025914 1.0079 [ -3.0141 5.7307 ] : var4: -0.023154 1.0059 [ -2.9557 5.7307 ] : ----------------------------------------------------------- Factory : Evaluate classifier: GTB : GTB : [dataset_pymva] : Loop over test events and fill histograms with classifier response... : TFHandler_GTB : Variable Mean RMS [ Min Max ] : ----------------------------------------------------------- : var1: -0.019646 1.6797 [ -4.8163 4.5708 ] : var2: -0.028834 1.5789 [ -5.2407 4.4671 ] : var3: -0.036699 1.7446 [ -5.2331 4.6430 ] : var4: 0.11995 2.1669 [ -6.3160 4.8976 ] : ----------------------------------------------------------- : : Evaluation results ranked by best signal efficiency and purity (area) : ------------------------------------------------------------------------------------------------------------------- : DataSet MVA : Name: Method: ROC-integ : dataset_pymva PyKeras : 0.928 : dataset_pymva GTB : 0.918 : ------------------------------------------------------------------------------------------------------------------- : : Testing efficiency compared to training efficiency (overtraining check) : ------------------------------------------------------------------------------------------------------------------- : DataSet MVA Signal efficiency: from test sample (from training sample) : Name: Method: @B=0.01 @B=0.10 @B=0.30 : ------------------------------------------------------------------------------------------------------------------- : dataset_pymva PyKeras : 0.357 (0.335) 0.737 (0.780) 0.963 (0.957) : dataset_pymva GTB : 0.295 (0.395) 0.733 (0.788) 0.947 (0.948) : ------------------------------------------------------------------------------------------------------------------- : Dataset:dataset_pymva : Created tree 'TestTree' with 2400 events : Dataset:dataset_pymva : Created tree 'TrainTree' with 9600 events : Factory : Thank you for using TMVA! : For citation information, please visit: http://tmva.sf.net/citeTMVA.html
/cvmfs/sft-nightlies.cern.ch/lcg/views/dev3/Sat/x86_64-slc6-gcc49-opt/lib/python2.7/site-packages/ipykernel/__main__.py:1: DeprecationWarning: PyArray_FromDims: use PyArray_SimpleNew. if __name__ == '__main__': /cvmfs/sft-nightlies.cern.ch/lcg/views/dev3/Sat/x86_64-slc6-gcc49-opt/lib/python2.7/site-packages/ipykernel/__main__.py:1: DeprecationWarning: PyArray_FromDimsAndDataAndDescr: use PyArray_NewFromDescr. if __name__ == '__main__':
# Enable Javascript for ROOT so that we can draw the canvas
%jsroot on
# Print ROC
canvas = factory.GetROCCurve(dataloader)
canvas.Draw()