#!/usr/bin/env python # coding: utf-8 # #
# # Binary classification with PyMVA # In[1]: import ROOT # In[2]: # Select Theano as backend for Keras from os import environ environ['KERAS_BACKEND'] = 'theano' # Set architecture of system (AVX instruction set is not supported on SWAN) environ['THEANO_FLAGS'] = 'gcc.cxxflags=-march=corei7' from keras.models import Sequential from keras.layers.core import Dense, Dropout from keras.optimizers import Adam # ## Load data # In[3]: # Open file data = ROOT.TFile.Open('https://raw.githubusercontent.com/iml-wg/tmvatutorials/master/inputdata.root') # Get signal and background trees from file signal = data.Get('TreeS') background = data.Get('TreeB') # Add variables to dataloader dataloader = ROOT.TMVA.DataLoader('dataset_pymva') numVariables = len(signal.GetListOfBranches()) for branch in signal.GetListOfBranches(): dataloader.AddVariable(branch.GetName()) # Add trees to dataloader dataloader.AddSignalTree(signal, 1.0) dataloader.AddBackgroundTree(background, 1.0) trainTestSplit = 0.8 dataloader.PrepareTrainingAndTestTree(ROOT.TCut(''), 'TrainTestSplit_Signal={}:'.format(trainTestSplit)+\ 'TrainTestSplit_Background={}:'.format(trainTestSplit)+\ 'SplitMode=Random') # ## Set up TMVA # In[4]: # Setup TMVA ROOT.TMVA.Tools.Instance() ROOT.TMVA.PyMethodBase.PyInitialize() outputFile = ROOT.TFile.Open('TMVAOutputPyMVA.root', 'RECREATE') factory = ROOT.TMVA.Factory('TMVAClassification', outputFile, '!V:!Silent:Color:DrawProgressBar:Transformations=I,G:'+\ 'AnalysisType=Classification') # ## Define model for Keras # In[5]: # Define model model = Sequential() model.add(Dense(32, init='glorot_normal', activation='relu', input_dim=numVariables)) model.add(Dropout(0.5)) model.add(Dense(32, init='glorot_normal', activation='relu')) model.add(Dropout(0.5)) model.add(Dense(2, init='glorot_uniform', activation='softmax')) # Set loss and optimizer model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['categorical_accuracy',]) # Store model to file model.save('model.h5') # Print summary of model model.summary() # ## Book methods # # Just run the cells that contain the classifiers you want to try. # In[6]: # Keras interface with previously defined model factory.BookMethod(dataloader, ROOT.TMVA.Types.kPyKeras, 'PyKeras', 'H:!V:VarTransform=G:FilenameModel=model.h5:'+\ 'NumEpochs=10:BatchSize=32:'+\ 'TriesEarlyStopping=3') # In[7]: # Gradient tree boosting from scikit-learn package factory.BookMethod(dataloader, ROOT.TMVA.Types.kPyGTB, 'GTB', 'H:!V:VarTransform=None:'+\ 'NEstimators=100:LearningRate=0.1:MaxDepth=3') # ## Run training, testing and evaluation # In[8]: factory.TrainAllMethods() # In[9]: factory.TestAllMethods() # In[10]: factory.EvaluateAllMethods() # ## Print ROC # In[11]: # Enable Javascript for ROOT so that we can draw the canvas get_ipython().run_line_magic('jsroot', 'on') # Print ROC canvas = factory.GetROCCurve(dataloader) canvas.Draw()