Notebook

df106_HiggsToFourLeptons¶

The Higgs to four lepton analysis from the ATLAS Open Data release of 2020, with RDataFrame.

This tutorial is the Higgs to four lepton analysis from the ATLAS Open Data release in 2020 (http://opendata.atlas.cern/release/2020/documentation/). The data was taken with the ATLAS detector during 2016 at a center-of-mass energy of 13 TeV. The decay of the Standard Model Higgs boson to two Z bosons and subsequently to four leptons is called the "golden channel". The selection leads to a narrow invariant mass peak on top a relatively smooth and small background, revealing the Higgs at 125 GeV. The analysis is translated to an RDataFrame workflow processing about 300 MB of simulated events and data.

Lepton selection efficiency corrections ("scale factors") are applied to simulated samples to correct for the differences in the trigger, reconstruction, and identification efficiencies in simulation compared to real data. Systematic uncertainties for those scale factors are evaluated and the Vary function of RDataFrame is used to propagate the variations to the final four leptons mass distribution.

See the corresponding spec json file.

Author: Stefan Wunsch (KIT, CERN), Julia Mathe (CERN), Marta Czurylo (CERN)
This notebook tutorial was automatically generated with ROOTBOOK-izer from the macro found in the ROOT repository on Wednesday, April 17, 2024 at 11:08 AM.

In [ ]:

%%cpp -d
#include <Math/Vector4D.h>
#include <ROOT/RDFHelpers.hxx>
#include <ROOT/RDataFrame.hxx>
#include <ROOT/RVec.hxx>
#include <TCanvas.h>
#include <TGraph.h>
#include <TH1D.h>
#include <THStack.h>
#include <TLatex.h>
#include <TLegend.h>
#include <TProfile.h>
#include <TStyle.h>

using namespace ROOT::VecOps;
using PtEtaPhiEVectorF = ROOT::Math::LorentzVector<ROOT::Math::PtEtaPhiE4D<float>>;
using ROOT::RVecF;
using ROOT::RDF::RSampleInfo;
using namespace ROOT::RDF::Experimental;

Define functions needed in the analysis Select events for the analysis

In [ ]:

%%cpp -d
bool GoodElectronsAndMuons(const ROOT::RVecI &type, const RVecF &pt, const RVecF &eta, const RVecF &phi, const RVecF &e,
                           const RVecF &trackd0pv, const RVecF &tracksigd0pv, const RVecF &z0)
{
   for (size_t i = 0; i < type.size(); i++) {
      PtEtaPhiEVectorF p(0.001 * pt[i], eta[i], phi[i], 0.001 * e[i]);
      if (type[i] == 11) {
         if (pt[i] < 7000 || abs(eta[i]) > 2.47 || abs(trackd0pv[i] / tracksigd0pv[i]) > 5 ||
             abs(z0[i] * sin(p.Theta())) > 0.5)
            return false;
      } else {
         if (abs(trackd0pv[i] / tracksigd0pv[i]) > 5 || abs(z0[i] * sin(p.Theta())) > 0.5)
            return false;
      }
   }
   return true;
}

Compute the invariant mass of a four-lepton-system.

In [ ]:

%%cpp -d
float ComputeInvariantMass(const RVecF &pt, const RVecF &eta, const RVecF &phi, const RVecF &e)
{
   PtEtaPhiEVectorF p1(pt[0], eta[0], phi[0], e[0]);
   PtEtaPhiEVectorF p2(pt[1], eta[1], phi[1], e[1]);
   PtEtaPhiEVectorF p3(pt[2], eta[2], phi[2], e[2]);
   PtEtaPhiEVectorF p4(pt[3], eta[3], phi[3], e[3]);
   return 0.001 * (p1 + p2 + p3 + p4).M();
}

Enable Multithreading

In [ ]:

ROOT::EnableImplicitMT();

Create the RDataFrame from the spec json file. The df106_HiggsToFourLeptons_spec.json is provided in the same folder as this tutorial

In [ ]:

std::string dataset_spec = gROOT->GetTutorialsDir() + std::string("/dataframe/df106_HiggsToFourLeptons_spec.json");
ROOT::RDataFrame df = ROOT::RDF::Experimental::FromSpec(dataset_spec);

Add the ProgressBar feature

In [ ]:

ROOT::RDF::Experimental::AddProgressBar(df);

Perform the analysis Access metadata information that is stored in the JSON config file of the RDataFrame The metadata contained in the JSON file is accessible within a DefinePerSample call, through the RSampleInfo class

In [ ]:

auto df_analysis =
   df.DefinePerSample("xsecs", [](unsigned int slot, const RSampleInfo &id) { return id.GetD("xsecs"); })
      .DefinePerSample("lumi", [](unsigned int slot, const RSampleInfo &id) { return id.GetD("lumi"); })
      .DefinePerSample("sumws", [](unsigned int slot, const RSampleInfo &id) { return id.GetD("sumws"); })
      .DefinePerSample("sample_category",
                       [](unsigned int slot, const RSampleInfo &id) { return id.GetS("sample_category"); })

Apply an MC correction for the ZZ decay due to missing gg->ZZ process

In [ ]:

.DefinePerSample("scale",
                       [](unsigned int slot, const ROOT::RDF::RSampleInfo &id) {
                          return id.Contains("mc_363490.llll.4lep.root") ? 1.3f : 1.0f;
                       })

Select electron or muon trigger

In [ ]:

.Filter("trigE || trigM")

Select events with exactly four good leptons conserving charge and lepton numbers Note that all collections are RVecs and good_lep is the mask for the good leptons. The lepton types are PDG numbers and set to 11 or 13 for an electron or muon irrespective of the charge.

In [ ]:

.Define("good_lep",
              "abs(lep_eta) < 2.5 && lep_pt > 5000 && lep_ptcone30 / lep_pt < 0.3 && lep_etcone20 / lep_pt < 0.3")
      .Filter("Sum(good_lep) == 4")
      .Filter("Sum(lep_charge[good_lep]) == 0")
      .Define("goodlep_sumtypes", "Sum(lep_type[good_lep])")
      .Filter("goodlep_sumtypes == 44 || goodlep_sumtypes == 52 || goodlep_sumtypes == 48")

Apply additional cuts depending on lepton flavour

In [ ]:

.Filter(
         "GoodElectronsAndMuons(lep_type[good_lep], lep_pt[good_lep], lep_eta[good_lep], lep_phi[good_lep], "
         "lep_E[good_lep], lep_trackd0pvunbiased[good_lep], lep_tracksigd0pvunbiased[good_lep], lep_z0[good_lep])")

Create new columns with the kinematics of good leptons

In [ ]:

.Define("goodlep_pt", "lep_pt[good_lep]")
      .Define("goodlep_eta", "lep_eta[good_lep]")
      .Define("goodlep_phi", "lep_phi[good_lep]")
      .Define("goodlep_E", "lep_E[good_lep]")
      .Define("goodlep_type", "lep_type[good_lep]")

Select leptons with high transverse momentum

In [ ]:

.Filter("goodlep_pt[0] > 25000 && goodlep_pt[1] > 15000 && goodlep_pt[2] > 10000")

Compute invariant mass

In [ ]:

.Define("m4l", "ComputeInvariantMass(goodlep_pt, goodlep_eta, goodlep_phi, goodlep_E)")

Reweighting of the samples is different for "data" and "MC"

In [ ]:

.DefinePerSample("reweighting", [](unsigned int slot, const RSampleInfo &id) { return id.Contains("mc"); });

Define the weight column (scale factor) for the MC samples

In [ ]:

auto df_mc = df_analysis.Filter("reweighting == true")
                .Define("weight", ("scaleFactor_ELE * scaleFactor_MUON * scaleFactor_LepTRIGGER * "
                                   "scaleFactor_PILEUP * mcWeight * scale * xsecs / sumws * lumi"));

Book histograms for individual MC samples

In [ ]:

auto df_higgs = df_mc.Filter(R"(sample_category == "higgs")")
                   .Histo1D<float>(ROOT::RDF::TH1DModel("higgs", "m4l", 24, 80, 170), "m4l", "weight");
auto df_zz = df_mc.Filter("sample_category == \"zz\"")
                .Histo1D<float>(ROOT::RDF::TH1DModel("zz", "m4l", 24, 80, 170), "m4l", "weight");
auto df_other = df_mc.Filter("sample_category == \"other\"")
                   .Histo1D<float>(ROOT::RDF::TH1DModel("other", "m4l", 24, 80, 170), "m4l", "weight");

Book the invariant mass histogram for the data

In [ ]:

auto df_h_mass_data = df_analysis.Filter("reweighting == false")
                         .Filter("sample_category == \"data\"")
                         .Define("weight_", []() { return 1; })
                         .Histo1D<float>(ROOT::RDF::TH1DModel("data", "m4l", 24, 80, 170), "m4l", "weight_");

Evaluate the systematic uncertainty

The systematic uncertainty in this analysis is the MC scale factor uncertainty that depends on lepton kinematics such as pT or pseudorapidity. Muons uncertainties are negligible, as stated in https://atlas.web.cern.ch/Atlas/GROUPS/PHYSICS/PAPERS/MUON Electrons uncertainties are evaluated based on the plots available in https://doi.org/10.48550/arXiv.1908.0 The uncertainties are linearly interpolated, using the TGraph::Eval() method, to cover a range of pT values covered by the analysis.

In [ ]:

const std::vector<double> x{5.50e3, 5.52e3, 12.54e3, 17.43e3, 22.40e3, 27.48e3, 30e3, 10000e3};
const std::vector<double> y{0.06628, 0.06395, 0.06396, 0.03372, 0.02441, 0.01403, 0, 0};
TGraph graph(x.size(), x.data(), y.data());

Use the Vary method to add the systematic variations to the total MC scale factor ("weight") of the analysis The input consists of the input column to be varied and the lambda function to compute the systematic variations. The new output columns contain the varied values of the input column.

In [ ]:

auto df_with_variations_mc =
   df_mc
      .Vary("weight",
            [&graph](double x, const RVecF &pt, const RVec<unsigned int> &type) {
               const auto v = Mean(Map(pt[type == 11], [&graph](auto p) { return graph.Eval(p); }));
               return RVec<double>{(1 + v) * x, (1 - v) * x};
            },
            {"weight", "goodlep_pt", "goodlep_type"}, {"up", "down"})
      .Histo1D<float>(ROOT::RDF::TH1DModel("Invariant Mass", "m4l", 24, 80, 170), "m4l", "weight");

Create the total MC scale factor histograms: "nominal", "weight:up" and "weight:down".

In [ ]:

auto histos_mc = VariationsFor(df_with_variations_mc);

Evaluate the total MC uncertainty based on the variations. Note, in this case the uncertainties are symmetric.

In [ ]:

for (unsigned int i = 0; i < histos_mc["nominal"].GetXaxis()->GetNbins(); i++) {
   histos_mc["nominal"].SetBinError(
      i, (histos_mc["weight:up"].GetBinContent(i) - histos_mc["nominal"].GetBinContent(i)));
}

Make the plot of the data, individual MC contributions and the total MC scale factor systematic variations.

In [ ]:

gROOT->SetStyle("ATLAS");

Create canvas with pad

In [ ]:

auto c = new TCanvas("c", " ", 600, 600);
auto pad = new TPad("upper_pad", "", 0, 0, 1, 1);
pad->SetTickx(0);
pad->SetTicky(0);
pad->Draw();
pad->cd();

Draw stack with MC contributions

In [ ]:

auto stack = new THStack("stack", "");
auto h_other = df_other.GetPtr();
h_other->SetFillColor(kViolet - 9);
stack->Add(h_other);
auto h_zz = df_zz.GetPtr();
h_zz->SetFillColor(kAzure - 9);
stack->Add(h_zz);
auto h_higgs = df_higgs.GetPtr();
h_higgs->SetFillColor(kRed + 2);
stack->Add(h_higgs);

stack->Draw("HIST");
stack->GetHistogram()->SetTitle("");
stack->GetHistogram()->GetXaxis()->SetLabelSize(0.035);
stack->GetHistogram()->GetXaxis()->SetTitleSize(0.045);
stack->GetHistogram()->GetXaxis()->SetTitleOffset(1.3);
stack->GetHistogram()->GetXaxis()->SetTitle("m_{4l}^{H#rightarrow ZZ} [GeV]");
stack->GetHistogram()->GetYaxis()->SetLabelSize(0.035);
stack->GetHistogram()->GetYaxis()->SetTitleSize(0.045);
stack->GetHistogram()->GetYaxis()->SetTitle("Events");
stack->SetMaximum(35);
stack->GetHistogram()->GetYaxis()->ChangeLabel(1, -1, 0);
stack->DrawClone(
   "HIST"); // DrawClone() method is necessary to draw a TObject in case the original object goes out of scope

Draw MC scale factor and variations

In [ ]:

histos_mc["nominal"].SetStats(false);
histos_mc["nominal"].SetFillColor(kBlack);
histos_mc["nominal"].SetFillStyle(3254);
histos_mc["nominal"].DrawClone("E2 sames");
histos_mc["weight:up"].SetLineColor(kGreen + 2);
histos_mc["weight:up"].SetStats(false);
histos_mc["weight:up"].DrawClone("HIST sames");
histos_mc["weight:down"].SetLineColor(kBlue + 2);
histos_mc["weight:down"].SetStats(false);
histos_mc["weight:down"].DrawClone("HIST sames");

Draw data histogram

In [ ]:

auto h_data = df_h_mass_data.GetPtr();
h_data->SetMarkerStyle(20);
h_data->SetMarkerSize(1.);
h_data->SetLineWidth(2);
h_data->SetLineColor(kBlack);
h_data->SetStats(false);
h_data->DrawClone("E sames");

Add legend

In [ ]:

TLegend legend(0.57, 0.65, 0.94, 0.94);
legend.SetTextFont(42);
legend.SetFillStyle(0);
legend.SetBorderSize(0);
legend.SetTextSize(0.025);
legend.SetTextAlign(32);
legend.AddEntry(h_data, "Data", "lep");
legend.AddEntry(h_higgs, "Higgs MC", "f");
legend.AddEntry(h_zz, "ZZ MC", "f");
legend.AddEntry(h_other, "Other MC", "f");
legend.AddEntry(&(histos_mc["weight:down"]), "Total MC Variations Down", "l");
legend.AddEntry(&(histos_mc["weight:up"]), "Total MC Variations Up", "l");
legend.AddEntry(&(histos_mc["nominal"]), "Total MC Uncertainty", "f");
legend.DrawClone("Same");

Add ATLAS label

In [ ]:

TLatex atlas_label;
atlas_label.SetTextFont(70);
atlas_label.SetTextSize(0.04);
atlas_label.DrawLatexNDC(0.19, 0.85, "ATLAS");
TLatex data_label;
data_label.SetTextFont(42);
data_label.DrawLatexNDC(0.19 + 0.13, 0.85, "Open Data");
TLatex header;
data_label.SetTextFont(42);
header.SetTextSize(0.035);
header.DrawLatexNDC(0.21, 0.8, "#sqrt{s} = 13 TeV, 10 fb^{-1}");

Save the plot.

In [ ]:

c->SaveAs("df106_HiggsToFourLeptons_cpp.png");
std::cout << "Saved figure to df106_HiggsToFourLeptons_cpp.png" << std::endl;

Draw all canvases

In [ ]:

gROOT->GetListOfCanvases()->Draw()