#!/usr/bin/env python
# coding: utf-8
#
#
# # Binary classification with PyMVA
# In[1]:
import ROOT
# In[2]:
# Select Theano as backend for Keras
from os import environ
environ['KERAS_BACKEND'] = 'theano'
# Set architecture of system (AVX instruction set is not supported on SWAN)
environ['THEANO_FLAGS'] = 'gcc.cxxflags=-march=corei7'
from keras.models import Sequential
from keras.layers.core import Dense, Dropout
from keras.optimizers import Adam
# ## Load data
# In[3]:
# Open file
data = ROOT.TFile.Open('https://raw.githubusercontent.com/iml-wg/tmvatutorials/master/inputdata.root')
# Get signal and background trees from file
signal = data.Get('TreeS')
background = data.Get('TreeB')
# Add variables to dataloader
dataloader = ROOT.TMVA.DataLoader('dataset_pymva')
numVariables = len(signal.GetListOfBranches())
for branch in signal.GetListOfBranches():
dataloader.AddVariable(branch.GetName())
# Add trees to dataloader
dataloader.AddSignalTree(signal, 1.0)
dataloader.AddBackgroundTree(background, 1.0)
trainTestSplit = 0.8
dataloader.PrepareTrainingAndTestTree(ROOT.TCut(''),
'TrainTestSplit_Signal={}:'.format(trainTestSplit)+\
'TrainTestSplit_Background={}:'.format(trainTestSplit)+\
'SplitMode=Random')
# ## Set up TMVA
# In[4]:
# Setup TMVA
ROOT.TMVA.Tools.Instance()
ROOT.TMVA.PyMethodBase.PyInitialize()
outputFile = ROOT.TFile.Open('TMVAOutputPyMVA.root', 'RECREATE')
factory = ROOT.TMVA.Factory('TMVAClassification', outputFile,
'!V:!Silent:Color:DrawProgressBar:Transformations=I,G:'+\
'AnalysisType=Classification')
# ## Define model for Keras
# In[5]:
# Define model
model = Sequential()
model.add(Dense(32, init='glorot_normal', activation='relu',
input_dim=numVariables))
model.add(Dropout(0.5))
model.add(Dense(32, init='glorot_normal', activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, init='glorot_uniform', activation='softmax'))
# Set loss and optimizer
model.compile(loss='categorical_crossentropy', optimizer=Adam(),
metrics=['categorical_accuracy',])
# Store model to file
model.save('model.h5')
# Print summary of model
model.summary()
# ## Book methods
#
# Just run the cells that contain the classifiers you want to try.
# In[6]:
# Keras interface with previously defined model
factory.BookMethod(dataloader, ROOT.TMVA.Types.kPyKeras, 'PyKeras',
'H:!V:VarTransform=G:FilenameModel=model.h5:'+\
'NumEpochs=10:BatchSize=32:'+\
'TriesEarlyStopping=3')
# In[7]:
# Gradient tree boosting from scikit-learn package
factory.BookMethod(dataloader, ROOT.TMVA.Types.kPyGTB, 'GTB',
'H:!V:VarTransform=None:'+\
'NEstimators=100:LearningRate=0.1:MaxDepth=3')
# ## Run training, testing and evaluation
# In[8]:
factory.TrainAllMethods()
# In[9]:
factory.TestAllMethods()
# In[10]:
factory.EvaluateAllMethods()
# ## Print ROC
# In[11]:
# Enable Javascript for ROOT so that we can draw the canvas
get_ipython().run_line_magic('jsroot', 'on')
# Print ROC
canvas = factory.GetROCCurve(dataloader)
canvas.Draw()