StandardFrequentistDiscovery
This is a standard demo that can be used with any ROOT file prepared in the standard way. You specify:
With default parameters the macro will attempt to run the standard hist2workspace example and read the ROOT file that it produces.
Author: Sven Kreiss, Kyle Cranmer
This notebook tutorial was automatically generated with ROOTBOOK-izer from the macro found in the ROOT repository on Wednesday, April 17, 2024 at 11:21 AM.
%%cpp -d
#include "TFile.h"
#include "TROOT.h"
#include "TH1F.h"
#include "TF1.h"
#include "TCanvas.h"
#include "TStopwatch.h"
#include "RooWorkspace.h"
#include "RooAbsData.h"
#include "RooRandom.h"
#include "RooRealSumPdf.h"
#include "RooNumIntConfig.h"
#include "RooStats/ModelConfig.h"
#include "RooStats/ToyMCImportanceSampler.h"
#include "RooStats/HypoTestResult.h"
#include "RooStats/HypoTestPlot.h"
#include "RooStats/SamplingDistribution.h"
#include "RooStats/ProfileLikelihoodTestStat.h"
#include "RooStats/SimpleLikelihoodRatioTestStat.h"
#include "RooStats/ProfileLikelihoodCalculator.h"
#include "RooStats/LikelihoodInterval.h"
#include "RooStats/LikelihoodIntervalPlot.h"
#include "RooStats/FrequentistCalculator.h"
#include "TSystem.h"
#include <vector>
using namespace RooFit;
using namespace RooStats;
Arguments are defined.
const char *infile = "";
const char *workspaceName = "channel1";
const char *modelConfigNameSB = "ModelConfig";
const char *dataName = "obsData";
int toys = 1000;
double poiValueForBackground = 0.0;
double poiValueForSignal = 1.0;
The workspace contains the model for s+b. The b model is "autogenerated" by copying s+b and setting the one parameter of interest to zero. To keep the script simple, multiple parameters of interest or different functional forms of the b model are not supported.
for now, assume there is only one parameter of interest, and these are its values:
First part is just to access a user-defined file or create the standard example file if it doesn't exist
const char *filename = "";
if (!strcmp(infile, "")) {
filename = "results/example_channel1_GammaExample_model.root";
bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code
// if file does not exists generate with histfactory
if (!fileExist) {
// Normally this would be run on the command line
cout << "will run standard hist2workspace example" << endl;
gROOT->ProcessLine(".! prepareHistFactory .");
gROOT->ProcessLine(".! hist2workspace config/example.xml");
cout << "\n\n---------------------" << endl;
cout << "Done creating example input" << endl;
cout << "---------------------\n\n" << endl;
}
} else
filename = infile;
Try to open the file
TFile *file = TFile::Open(filename);
if input file was specified byt not found, quit
if (!file) {
cout << "StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl;
return -1;
}
TStopwatch *mn_t = new TStopwatch;
mn_t->Start();
get the workspace out of the file
RooWorkspace *w = (RooWorkspace *)file->Get(workspaceName);
if (!w) {
cout << "workspace not found" << endl;
return -1.0;
}
get the modelConfig out of the file
ModelConfig *mc = (ModelConfig *)w->obj(modelConfigNameSB);
get the data out of the file
RooAbsData *data = w->data(dataName);
make sure ingredients are found
if (!data || !mc) {
w->Print();
cout << "data or ModelConfig was not found" << endl;
return -1.0;
}
RooRealVar *firstPOI = (RooRealVar *)mc->GetParametersOfInterest()->first();
firstPOI->setVal(poiValueForSignal);
mc->SetSnapshot(*mc->GetParametersOfInterest());
create null model
ModelConfig *mcNull = mc->Clone("ModelConfigNull");
firstPOI->setVal(poiValueForBackground);
mcNull->SetSnapshot(*(RooArgSet *)mcNull->GetParametersOfInterest()->snapshot());
Configure a ProfileLikelihoodTestStat and a SimpleLikelihoodRatioTestStat to use simultaneously with ToyMCSampler
ProfileLikelihoodTestStat *plts = new ProfileLikelihoodTestStat(*mc->GetPdf());
plts->SetOneSidedDiscovery(true);
plts->SetVarName("q_{0}/2");
configure the ToyMCImportanceSampler with two test statistics
ToyMCSampler toymcs(*plts, 50);
Since this tool needs to throw toy MC the PDF needs to be extended or the tool needs to know how many entries in a dataset per pseudo experiment. In the 'number counting form' where the entries in the dataset are counts, and not values of discriminating variables, the datasets typically only have one entry and the PDF is not extended.
if (!mc->GetPdf()->canBeExtended()) {
if (data->numEntries() == 1) {
toymcs.SetNEventsPerToy(1);
} else
cout << "Not sure what to do about this model" << endl;
}
We can use PROOF to speed things along in parallel ProofConfig pc(*w, 2, "user@yourfavoriteproofcluster", false);
ProofConfig pc(*w, 2, "", false);
toymcs.SetProofConfig(&pc); // enable proof
instantiate the calculator
FrequentistCalculator freqCalc(*data, *mc, *mcNull, &toymcs);
freqCalc.SetToys(toys, toys); // null toys, alt toys
Run the calculator and print result
HypoTestResult *freqCalcResult = freqCalc.GetHypoTest();
freqCalcResult->GetNullDistribution()->SetTitle("b only");
freqCalcResult->GetAltDistribution()->SetTitle("s+b");
freqCalcResult->Print();
double pvalue = freqCalcResult->NullPValue();
stop timing
mn_t->Stop();
cout << "total CPU time: " << mn_t->CpuTime() << endl;
cout << "total real time: " << mn_t->RealTime() << endl;
plot
TCanvas *c1 = new TCanvas();
HypoTestPlot *plot = new HypoTestPlot(*freqCalcResult, 100, -0.49, 9.51);
plot->SetLogYaxis(true);
add chi2 to plot
int nPOI = 1;
TF1 *f = new TF1("f", TString::Format("1*ROOT::Math::chisquared_pdf(2*x,%d,0)", nPOI), 0, 20);
f->SetLineColor(kBlack);
f->SetLineStyle(7);
plot->AddTF1(f, TString::Format("#chi^{2}(2x,%d)", nPOI));
plot->Draw();
c1->SaveAs("standard_discovery_output.pdf");
return pvalue;
Draw all canvases
gROOT->GetListOfCanvases()->Draw()