Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More examples, combine support, BDT/MVA support #368

Merged
merged 2 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions addons/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
add_subdirectory(FastJet)
add_subdirectory(ONNXRuntime)
add_subdirectory(TMVAHelper)

set(ADDONS_LIBRARIES ${ADDONS_LIBRARIES} PARENT_SCOPE)
message(STATUS "add-ons--------------------------- ${ADDONS_LIBRARIES}")
37 changes: 37 additions & 0 deletions addons/TMVAHelper/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@


find_package(TBB REQUIRED tbb)
find_package(ROOT)
find_package(ROOT COMPONENTS ROOTVecOps)
find_package(ROOT COMPONENTS TMVA)


message(STATUS "includes-------------------------- TEST: ${TBB_INCLUDE_DIRS}")

file(GLOB sources src/*.cc)
file(GLOB headers *.h)


fccanalyses_addon_build(TMVAHelper
SOURCES ${headers} ${sources}
# EXT_LIBS ROOT::ROOTVecOps ROOT::TMVA TBB::tbb
INSTALL_COMPONENT tmvahelper)

add_custom_command(TARGET TMVAHelper POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
${CMAKE_CURRENT_SOURCE_DIR}/python/*
${CMAKE_CURRENT_BINARY_DIR}
)

target_link_libraries(TMVAHelper PRIVATE TBB::tbb)
target_link_libraries(TMVAHelper PRIVATE ROOT::ROOTVecOps)
target_link_libraries(TMVAHelper PRIVATE ROOT::TMVA)
target_compile_features(TMVAHelper PRIVATE cxx_std_11)

install(FILES
${CMAKE_CURRENT_LIST_DIR}/TMVAHelper.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/TMVAHelper
)

file(GLOB _addon_python_files python/*.py)
install(FILES ${_addon_python_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/python/addons/TMVAHelper)
22 changes: 22 additions & 0 deletions addons/TMVAHelper/TMVAHelper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#ifndef TMVAHelper_TMVAHelper_h
#define TMVAHelper_TMVAHelper_h

#include <tbb/task_arena.h>
#include "ROOT/RVec.hxx"
#include "TMVA/RBDT.hxx"


class tmva_helper_xgb {
public:
explicit tmva_helper_xgb(const std::string &filename, const std::string &name, const unsigned &nvars, const unsigned int nslots = 1);
virtual ~tmva_helper_xgb();
ROOT::VecOps::RVec<float> operator()(const ROOT::VecOps::RVec<float> vars);

private:
unsigned int nvars_;
TMVA::Experimental::RBDT<> model_;
std::vector<TMVA::Experimental::RBDT<>> interpreters_;
};


#endif
39 changes: 39 additions & 0 deletions addons/TMVAHelper/python/TMVAHelper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@

import ROOT
import pathlib

ROOT.gInterpreter.ProcessLine('#include "TMVAHelper/TMVAHelper.h"')
ROOT.gSystem.Load("libTMVAHelper.so")

class TMVAHelperXGB():

def __init__(self, model_input, model_name, variables=[]):

if len(variables) == 0: # try to get the variables from the model file (saved as a TList)
fIn = ROOT.TFile(model_input)
variables_ = fIn.Get("variables")
self.variables = [str(var.GetString()) for var in variables_]
fIn.Close()
else:
self.variables = variables
self.nvars = len(self.variables)
self.model_input = model_input
self.model_name = model_name
self.nthreads = ROOT.GetThreadPoolSize()

self.tmva_helper = ROOT.tmva_helper_xgb(self.model_input, self.model_name, self.nvars, self.nthreads)
self.var_col = f"tmva_vars_{self.model_name}"

def run_inference(self, df, col_name = "mva_score"):

# check if columns exist in the dataframe
cols = df.GetColumnNames()
for var in self.variables:
if not var in cols:
raise Exception(f"Variable {var} not defined in dataframe.")

vars_str = ', (float)'.join(self.variables)
df = df.Define(self.var_col, f"ROOT::VecOps::RVec<float>{{{vars_str}}}")
df = df.Define(col_name, self.tmva_helper, [self.var_col])
return df

23 changes: 23 additions & 0 deletions addons/TMVAHelper/src/TMVAHelper.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#include "TMVAHelper/TMVAHelper.h"


tmva_helper_xgb::tmva_helper_xgb(const std::string &filename, const std::string &name, const unsigned &nvars, const unsigned int nslots) :
model_(name, filename), nvars_(nvars) {

const unsigned int nslots_actual = std::max(nslots, 1U);
interpreters_.reserve(nslots_actual);
for (unsigned int islot = 0; islot < nslots_actual; ++islot) {
interpreters_.emplace_back(model_);
}
}

ROOT::VecOps::RVec<float> tmva_helper_xgb::operator()(const ROOT::VecOps::RVec<float> vars) {
auto const tbb_slot = std::max(tbb::this_task_arena::current_thread_index(), 0);
if (tbb_slot >= interpreters_.size()) {
throw std::runtime_error("Not enough interpreters allocated for number of tbb threads");
}
auto &interpreter_data = interpreters_[tbb_slot];
return interpreter_data.Compute(vars);
}

tmva_helper_xgb::~tmva_helper_xgb() {}
3 changes: 3 additions & 0 deletions bin/fccanalysis
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ from pin_analysis import PinAnalysis
from run_analysis import run
from run_final_analysis import run_final
from do_plots import do_plots
from do_combine import do_combine


class MultiLineFormatter(logging.Formatter):
Expand Down Expand Up @@ -94,6 +95,8 @@ def main():
run_final(parser)
elif args.command == 'plots':
do_plots(parser)
elif args.command == 'combine':
do_combine(parser)
else:
run(parser)

Expand Down
81 changes: 81 additions & 0 deletions examples/FCCee/higgs/jetclustering/functions.h
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can those functions be moved to the analyzers/dataframe ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will do a major port of all the C++ functions in a subsequent PR (will add some more functions as well).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK

Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#ifndef ZHfunctions_H
#define ZHfunctions_H

#include <cmath>
#include <vector>
#include <math.h>

#include "TLorentzVector.h"
#include "ROOT/RVec.hxx"
#include "edm4hep/ReconstructedParticleData.h"
#include "edm4hep/MCParticleData.h"
#include "edm4hep/ParticleIDData.h"
#include "ReconstructedParticle2MC.h"


namespace FCCAnalyses {

// make Lorentzvectors from pseudojets
Vec_tlv makeLorentzVectors(Vec_f jets_px, Vec_f jets_py, Vec_f jets_pz, Vec_f jets_e) {
Vec_tlv result;
for(int i=0; i<jets_px.size(); i++) {
TLorentzVector tlv;
tlv.SetPxPyPzE(jets_px[i], jets_py[i], jets_pz[i], jets_e[i]);
result.push_back(tlv);
}
return result;
}

Vec_i jetTruthFinder(std::vector<std::vector<int>> constituents, Vec_rp reco, Vec_mc mc, Vec_i mcind, bool findGluons = false) {
// jet truth=finder: match the gen-level partons (eventually with gluons) with the jet constituents
// matching by mimimizing the sum of dr of the parton and all the jet constituents

Vec_tlv genQuarks; // Lorentz-vector of potential partons (gen-level)
Vec_i genQuarks_pdgId; // corresponding PDG ID
for(size_t i = 0; i < mc.size(); ++i) {
int pdgid = abs(mc.at(i).PDG);
if(pdgid > 6 and not findGluons) continue; // only quarks
if(pdgid > 6 and pdgid != 21 and findGluons) continue; // only quarks and gluons
TLorentzVector tlv;
tlv.SetXYZM(mc.at(i).momentum.x,mc.at(i).momentum.y,mc.at(i).momentum.z,mc.at(i).mass);
genQuarks.push_back(tlv);
genQuarks_pdgId.push_back(mc.at(i).PDG);
}

Vec_tlv recoParticles; // Lorentz-vector of all reconstructed particles
for(size_t i = 0; i < reco.size(); ++i) {
auto & p = reco[i];
TLorentzVector tlv;
tlv.SetXYZM(p.momentum.x, p.momentum.y, p.momentum.z, p.mass);
recoParticles.push_back(tlv);
}

Vec_i usedIdx;
Vec_i result;
for(size_t iJet = 0; iJet < constituents.size(); ++iJet) {
Vec_d dr;
for(size_t iGen = 0; iGen < genQuarks.size(); ++iGen) {
if(std::find(usedIdx.begin(), usedIdx.end(), iGen) != usedIdx.end()) {
dr.push_back(1e99); // set infinite dr, skip
continue;
}
dr.push_back(0);
for(size_t i = 0; i < constituents[iJet].size(); ++i) {
dr[iGen] += recoParticles[constituents[iJet][i]].DeltaR(genQuarks[iGen]);
}
}
int maxDrIdx = std::min_element(dr.begin(),dr.end()) - dr.begin();
usedIdx.push_back(maxDrIdx);
result.push_back(genQuarks_pdgId[maxDrIdx]);

}
return result;
}





}

#endif
103 changes: 103 additions & 0 deletions examples/FCCee/higgs/jetclustering/histmaker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@

processList = {
'wzp6_ee_mumuH_Hbb_ecm240': {'fraction': 1},
}

# Production tag when running over EDM4Hep centrally produced events, this points to the yaml files for getting sample statistics (mandatory)
prodTag = "FCCee/winter2023/IDEA/"

# Link to the dictonary that contains all the cross section informations etc... (mandatory)
procDict = "FCCee_procDict_winter2023_IDEA.json"

# additional/custom C++ functions, defined in header files (optional)
includePaths = ["functions.h"]

#Optional: output directory, default is local running directory
outputDir = f"outputs/FCCee/higgs/jetclustering/histmaker/"

# optional: ncpus, default is 4, -1 uses all cores available
nCPUS = -1

# scale the histograms with the cross-section and integrated luminosity
doScale = True
intLumi = 7200000.0 # 7.2 /ab


# define some binning for various histograms
bins_p_mu = (250, 0, 250)
bins_m_ll = (250, 0, 250)
bins_p_ll = (250, 0, 250)
bins_recoil = (200, 120, 140)
bins_pdgid = (51, -25.5, 25.5)
bins_dijet_m = (80, 70, 150)


# build_graph function that contains the analysis logic, cuts and histograms (mandatory)
def build_graph(df, dataset):

results = []
df = df.Define("weight", "1.0")
weightsum = df.Sum("weight")

df = df.Alias("Particle0", "Particle#0.index")
df = df.Alias("Particle1", "Particle#1.index")
df = df.Alias("MCRecoAssociations0", "MCRecoAssociations#0.index")
df = df.Alias("MCRecoAssociations1", "MCRecoAssociations#1.index")

# select muons from Z decay and form Z/recoil mass
df = df.Alias("Muon0", "Muon#0.index")
df = df.Define("muons_all", "FCCAnalyses::ReconstructedParticle::get(Muon0, ReconstructedParticles)")
df = df.Define("muons", "FCCAnalyses::ReconstructedParticle::sel_p(25)(muons_all)")
df = df.Define("muons_p", "FCCAnalyses::ReconstructedParticle::get_p(muons)")
df = df.Define("muons_no", "FCCAnalyses::ReconstructedParticle::get_n(muons)")
df = df.Filter("muons_no >= 2")

df = df.Define("zmumu", "ReconstructedParticle::resonanceBuilder(91)(muons)")
df = df.Define("zmumu_m", "ReconstructedParticle::get_mass(zmumu)[0]")
df = df.Define("zmumu_p", "ReconstructedParticle::get_p(zmumu)[0]")
df = df.Define("zmumu_recoil", "ReconstructedParticle::recoilBuilder(240)(zmumu)")
df = df.Define("zmumu_recoil_m", "ReconstructedParticle::get_mass(zmumu_recoil)[0]")

# basic selection
df = df.Filter("zmumu_m > 70 && zmumu_m < 100")
df = df.Filter("zmumu_p > 20 && zmumu_p < 70")
df = df.Filter("zmumu_recoil_m < 140 && zmumu_recoil_m > 120")


# do jet clustering on all particles, except the muons
df = df.Define("rps_no_muons", "FCCAnalyses::ReconstructedParticle::remove(ReconstructedParticles, muons)")
df = df.Define("RP_px", "FCCAnalyses::ReconstructedParticle::get_px(rps_no_muons)")
df = df.Define("RP_py", "FCCAnalyses::ReconstructedParticle::get_py(rps_no_muons)")
df = df.Define("RP_pz","FCCAnalyses::ReconstructedParticle::get_pz(rps_no_muons)")
df = df.Define("RP_e", "FCCAnalyses::ReconstructedParticle::get_e(rps_no_muons)")
df = df.Define("pseudo_jets", "FCCAnalyses::JetClusteringUtils::set_pseudoJets(RP_px, RP_py, RP_pz, RP_e)")


# Implemented algorithms and arguments: https://github.com/HEP-FCC/FCCAnalyses/blob/master/addons/FastJet/JetClustering.h
# More info: https://indico.cern.ch/event/1173562/contributions/4929025/attachments/2470068/4237859/2022-06-FCC-jets.pdf
df = df.Define("clustered_jets", "JetClustering::clustering_ee_kt(2, 2, 0, 10)(pseudo_jets)") # 2-jet exclusive clustering

df = df.Define("jets", "FCCAnalyses::JetClusteringUtils::get_pseudoJets(clustered_jets)")
df = df.Define("jetconstituents", "FCCAnalyses::JetClusteringUtils::get_constituents(clustered_jets)") # one-to-one mapping to the input collection (rps_no_muons)
df = df.Define("jets_e", "FCCAnalyses::JetClusteringUtils::get_e(jets)")
df = df.Define("jets_px", "FCCAnalyses::JetClusteringUtils::get_px(jets)")
df = df.Define("jets_py", "FCCAnalyses::JetClusteringUtils::get_py(jets)")
df = df.Define("jets_pz", "FCCAnalyses::JetClusteringUtils::get_pz(jets)")
df = df.Define("jets_phi", "FCCAnalyses::JetClusteringUtils::get_phi(jets)")
df = df.Define("jets_m", "FCCAnalyses::JetClusteringUtils::get_m(jets)")

# convert jets to LorentzVectors
df = df.Define("jets_tlv", "FCCAnalyses::makeLorentzVectors(jets_px, jets_py, jets_pz, jets_e)")
df = df.Define("jets_truth", "FCCAnalyses::jetTruthFinder(jetconstituents, rps_no_muons, Particle, MCRecoAssociations1)") # returns best-matched PDG ID of the jets
df = df.Define("dijet_higgs_m", "(jets_tlv[0]+jets_tlv[1]).M()")

# define histograms
results.append(df.Histo1D(("zmumu_m", "", *bins_m_ll), "zmumu_m"))
results.append(df.Histo1D(("zmumu_p", "", *bins_p_ll), "zmumu_p"))
results.append(df.Histo1D(("zmumu_recoil_m", "", *bins_recoil), "zmumu_recoil_m"))

results.append(df.Histo1D(("jets_truth", "", *bins_pdgid), "jets_truth"))
results.append(df.Histo1D(("dijet_higgs_m", "", *bins_dijet_m), "dijet_higgs_m"))

return results, weightsum

Loading
Loading