#!/usr/bin/env python # coding: utf-8 #
#
# #
#
# #
#
# # User interface #
# # In[1]: import ROOT from ROOT import TFile, TMVA, TCut # ## Enable JS visualization # To use new interactive features in notebook we have to enable a module called JsMVA. This can be done by using ipython magic: %jsmva. # In[2]: get_ipython().run_line_magic('jsmva', 'on') # ## Declaration of Factory # First let's start with the classical version of declaration. If you know how to use TMVA in C++ then you can use that version here in python: first we need to pass a string called job name, as second argument we need to pass an opened output TFile (this is optional, if it's present then it will be used to store output histograms) and as third (or second) argument we pass a string which contains all the settings related to Factory (separated with ':' character). # ### C++ like declaration # In[3]: outputFile = TFile( "TMVA.root", 'RECREATE' ) TMVA.Tools.Instance(); factory = TMVA.Factory( "TMVAClassification", outputFile #this is optional ,"!V:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # The options string can contain the following options: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
OptionDefaultPredefined valuesDescription
VFalse-Verbose flag
ColorTrue-Flag for colored output
Transformations""-List of transformations to test. For example with "I;D;P;U;G" string identity, decorrelation, PCA, uniform and Gaussian transformations will be applied
SilentFalse-Batch mode: boolean silent flag inhibiting # any output from TMVA after # the creation of the factory class object
DrawProgressBarTrue-Draw progress bar to display training, # testing and evaluation schedule (default: # True)
AnalysisTypeAutoClassification, # Regression, # Multiclass, AutoSet the analysis type
# ### Pythonic version # By enabling JsMVA we have new, more readable ways to do the declaration. # #### First version # Instead of passing options as a long string we can pass them separately as named arguments: # In[4]: factory = TMVA.Factory("TMVAClassification", outputFile, V=False, Color=True,Silent=True, DrawProgressBar=True, Transformations="I;D;P;G,D", AnalysisType="Classification") # You can see the Transformations variable is set to "I;D;P;G;D" string. Instead of this, we can pass these options as a list: ["I", "D", "P", "G", "D"] # #### Second version # In the first version we just changed the way as we pass the options. The first 2 argument was still positional arguments. These parameters also can be passed as named arguments: the name of first parameter in first version is JobName and the name of second argument is TargetFile # In[5]: factory = TMVA.Factory(JobName="TMVAClassification", TargetFile=outputFile, V=False, Color=True, DrawProgressBar=True, Transformations=["I", "D", "P", "G", "D"], AnalysisType="Classification") # Arguments of constructor: # The options string can contain the following options: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
KeywordCan be used as positional argumentDefaultPredefined valuesDescription
JobNameyes, 1.not optional-Name of job
TargetFileyes, 2.if not passed histograms won't be saved-File to write control and performance histograms histograms
VnoFalse-Verbose flag
ColornoTrue-Flag for colored output
Transformationsno""-List of transformations to test. For example with "I;D;P;U;G" string identity, decorrelation, PCA, uniform and Gaussian transformations will be applied
SilentnoFalse-Batch mode: boolean silent flag inhibiting # any output from TMVA after # the creation of the factory class object
DrawProgressBarnoTrue-Draw progress bar to display training, # testing and evaluation schedule (default: # True)
AnalysisTypenoAutoClassification, # Regression, # Multiclass, AutoSet the analysis type
# ## Declaring the DataLoader, adding variables and setting up the dataset # First we need to declare a DataLoader and add the variables (passing the variable names used in the test and train trees in input dataset). To add variable names to DataLoader we use the AddVariable function. Arguments of this function: # # 1. String containing the variable name. Using ":=" we can add definition too. # # 2. String (label to variable, if not present the variable name will be used) or character (defining the type of data points) # # 3. If we have label for variable, the data point type still can be passed as third argument # In[6]: dataset = "tmva_class_example" #the dataset name loader = TMVA.DataLoader(dataset) loader.AddVariable( "myvar1 := var1+var2", 'F' ) loader.AddVariable( "myvar2 := var1-var2", "Expression 2", 'F' ) loader.AddVariable( "var3", "Variable 3", 'F' ) loader.AddVariable( "var4", "Variable 4", 'F' ) # It is possible to define spectator variables, which are part of the input data set, but which are not # used in the MVA training, test nor during the evaluation, but can be used for correlation tests or others. # Parameters: # # 1. String containing the definition of spectator variable. # 2. Label for spectator variable. # 3. Data type # In[7]: loader.AddSpectator( "spec1:=var1*2", "Spectator 1", 'F' ) loader.AddSpectator( "spec2:=var1*3", "Spectator 2", 'F' ) # After adding the variables we have to add the datas to DataLoader. In order to do this we check if the dataset file doesn't exist in files directory we download from CERN's server. When we have the root file we open it and get the signal and background trees. # In[8]: if ROOT.gSystem.AccessPathName( "tmva_class_example.root" ) != 0: ROOT.gSystem.Exec( "wget https://root.cern.ch/files/tmva_class_example.root") input = TFile.Open( "tmva_class_example.root" ) # Get the signal and background trees for training signal = input.Get( "TreeS" ) background = input.Get( "TreeB" ) # To pass the signal and background trees to DataLoader we use the AddSignalTree and AddBackgroundTree functions, and we set up the corresponding DataLoader variable's too. # Arguments of functions: # # 1. Signal/Background tree # 2. Global weight used in all events in the tree. # In[9]: # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 loader.AddSignalTree(signal, signalWeight) loader.AddBackgroundTree(background, backgroundWeight) loader.fSignalWeight = signalWeight loader.fBackgroundWeight = backgroundWeight loader.fTreeS = signal loader.fTreeB = background # With using DataLoader.PrepareTrainingAndTestTree function we apply cuts on input events. In C++ this function also needs to add the options as a string (as we seen in Factory constructor) which with JsMVA can be passed (same as Factory constructor case) as keyword arguments. # # Arguments of PrepareTrainingAndTestTree: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
KeywordCan be used as positional argumentDefaultPredefined valuesDescription
SigCutyes, 1.--TCut object for signal cut
Bkgyes, 2.--TCut object for background cut
SplitModenoRandomRandom, # Alternate, # BlockMethod of picking training and testing # events
MixModenoSameAsSplitModeSameAsSplitMode, # Random, # Alternate, # BlockMethod of mixing events of differnt # classes into one dataset
SplitSeedno100-Seed for random event shuffling
NormModenoEqualNumEventsNone, NumEvents, # EqualNumEventsOverall renormalisation of event-by-event # weights used in the training (NumEvents: # average weight of 1 per # event, independently for signal and # background; EqualNumEvents: average # weight of 1 per event for signal, # and sum of weights for background # equal to sum of weights for signal)
nTrain_Signalno0 (all)-Number of training events of class Signal
nTest_Signalno0 (all)-Number of test events of class Signal
nTrain_Backgroundno0 (all)-Number of training events of class # Background
nTest_Background no0 (all)-Number of test events of class Background
VnoFalse-Verbosity
VerboseLevelnoInfoDebug, Verbose, # InfoVerbosity level
# In[10]: mycuts = TCut("") mycutb = TCut("") loader.PrepareTrainingAndTestTree(SigCut=mycuts, BkgCut=mycutb, nTrain_Signal=0, nTrain_Background=0, SplitMode="Random", NormMode="NumEvents", V=False) # ## Booking methods # To add which we want to train on dataset we have to use the Factory.BookMethod function. This method will add a method and it's options to Factory. # # Arguments: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
KeywordCan be used as positional argumentDefaultPredefined valuesDescription
DataLoaderyes, 1.--Pointer to DataLoader object
Methodyes, 2.- kVariable # kCuts , # kLikelihood , # kPDERS , # kHMatrix , # kFisher , # kKNN , # kCFMlpANN , # kTMlpANN , # kBDT , # kDT , # kRuleFit , # kSVM , # kMLP , # kBayesClassifier, # kFDA , # kBoost , # kPDEFoam , # kLD , # kPlugins , # kCategory , # kDNN , # kPyRandomForest , # kPyAdaBoost , # kPyGTB , # kC50 , # kRSNNS , # kRSVM , # kRXGB , # kMaxMethodSelected method number, method numbers defined in TMVA.Types
MethodTitleyes, 3.--Label for method
* no -- Other named arguments which are the options for selected method.
# In[11]: factory.BookMethod( DataLoader=loader, Method=TMVA.Types.kMLP, MethodTitle="MLP", H=False, V=False, NeuronType="tanh", VarTransform="N", NCycles=600, HiddenLayers="N+5", TestRate=5, UseRegulator=False ) # ## Evaluate importance # To calculate variable importance we can use Factory.EvaluateImportance function. The parameters of this function are the following: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
KeywordCan be used as positional argumentDefaultPredefined valuesDescription
DataLoaderyes, 1.--Pointer to DataLoader object
VITypeyes, 2.--Variable Importance type
Methodyes, 3.- kVariable # kCuts , # kLikelihood , # kPDERS , # kHMatrix , # kFisher , # kKNN , # kCFMlpANN , # kTMlpANN , # kBDT , # kDT , # kRuleFit , # kSVM , # kMLP , # kBayesClassifier, # kFDA , # kBoost , # kPDEFoam , # kLD , # kPlugins , # kCategory , # kDNN , # kPyRandomForest , # kPyAdaBoost , # kPyGTB , # kC50 , # kRSNNS , # kRSVM , # kRXGB , # kMaxMethodSelected method number, method numbers defined in TMVA.Types
MethodTitleyes, 4.--Label for method
VnoFalse-Verbose
NTreesnoNTrees
MinNodeSizenoMinNodeSize
MaxDepthnoMaxDepth
BoostTypenoBoostType
AdaBoostBetanoAdaBoostBeta
UseBaggedBoostnoUseBaggedBoost
BaggedSampleFractionno
SeparationTypeno
nCutsnonCuts
# In[12]: factory.EvaluateImportance(DataLoader=loader,VIType=0, Method=TMVA.Types.kBDT, MethodTitle="BDT", V=False,NTrees=5, MinNodeSize="2.5%",MaxDepth=2, BoostType="AdaBoost", AdaBoostBeta=0.5, UseBaggedBoost=True, BaggedSampleFraction=0.5, SeparationType="GiniIndex", nCuts=20 ); # In[ ]: