-
Notifications
You must be signed in to change notification settings - Fork 121
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
More examples, combine support, BDT/MVA support #368
Merged
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
add_subdirectory(FastJet) | ||
add_subdirectory(ONNXRuntime) | ||
add_subdirectory(TMVAHelper) | ||
|
||
set(ADDONS_LIBRARIES ${ADDONS_LIBRARIES} PARENT_SCOPE) | ||
message(STATUS "add-ons--------------------------- ${ADDONS_LIBRARIES}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
|
||
|
||
find_package(TBB REQUIRED tbb) | ||
find_package(ROOT) | ||
find_package(ROOT COMPONENTS ROOTVecOps) | ||
find_package(ROOT COMPONENTS TMVA) | ||
|
||
|
||
message(STATUS "includes-------------------------- TEST: ${TBB_INCLUDE_DIRS}") | ||
|
||
file(GLOB sources src/*.cc) | ||
file(GLOB headers *.h) | ||
|
||
|
||
fccanalyses_addon_build(TMVAHelper | ||
SOURCES ${headers} ${sources} | ||
# EXT_LIBS ROOT::ROOTVecOps ROOT::TMVA TBB::tbb | ||
INSTALL_COMPONENT tmvahelper) | ||
|
||
add_custom_command(TARGET TMVAHelper POST_BUILD | ||
COMMAND ${CMAKE_COMMAND} -E copy | ||
${CMAKE_CURRENT_SOURCE_DIR}/python/* | ||
${CMAKE_CURRENT_BINARY_DIR} | ||
) | ||
|
||
target_link_libraries(TMVAHelper PRIVATE TBB::tbb) | ||
target_link_libraries(TMVAHelper PRIVATE ROOT::ROOTVecOps) | ||
target_link_libraries(TMVAHelper PRIVATE ROOT::TMVA) | ||
target_compile_features(TMVAHelper PRIVATE cxx_std_11) | ||
|
||
install(FILES | ||
${CMAKE_CURRENT_LIST_DIR}/TMVAHelper.h | ||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/TMVAHelper | ||
) | ||
|
||
file(GLOB _addon_python_files python/*.py) | ||
install(FILES ${_addon_python_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/python/addons/TMVAHelper) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#ifndef TMVAHelper_TMVAHelper_h | ||
#define TMVAHelper_TMVAHelper_h | ||
|
||
#include <tbb/task_arena.h> | ||
#include "ROOT/RVec.hxx" | ||
#include "TMVA/RBDT.hxx" | ||
|
||
|
||
class tmva_helper_xgb { | ||
public: | ||
explicit tmva_helper_xgb(const std::string &filename, const std::string &name, const unsigned &nvars, const unsigned int nslots = 1); | ||
virtual ~tmva_helper_xgb(); | ||
ROOT::VecOps::RVec<float> operator()(const ROOT::VecOps::RVec<float> vars); | ||
|
||
private: | ||
unsigned int nvars_; | ||
TMVA::Experimental::RBDT<> model_; | ||
std::vector<TMVA::Experimental::RBDT<>> interpreters_; | ||
}; | ||
|
||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
|
||
import ROOT | ||
import pathlib | ||
|
||
ROOT.gInterpreter.ProcessLine('#include "TMVAHelper/TMVAHelper.h"') | ||
ROOT.gSystem.Load("libTMVAHelper.so") | ||
|
||
class TMVAHelperXGB(): | ||
|
||
def __init__(self, model_input, model_name, variables=[]): | ||
|
||
if len(variables) == 0: # try to get the variables from the model file (saved as a TList) | ||
fIn = ROOT.TFile(model_input) | ||
variables_ = fIn.Get("variables") | ||
self.variables = [str(var.GetString()) for var in variables_] | ||
fIn.Close() | ||
else: | ||
self.variables = variables | ||
self.nvars = len(self.variables) | ||
self.model_input = model_input | ||
self.model_name = model_name | ||
self.nthreads = ROOT.GetThreadPoolSize() | ||
|
||
self.tmva_helper = ROOT.tmva_helper_xgb(self.model_input, self.model_name, self.nvars, self.nthreads) | ||
self.var_col = f"tmva_vars_{self.model_name}" | ||
|
||
def run_inference(self, df, col_name = "mva_score"): | ||
|
||
# check if columns exist in the dataframe | ||
cols = df.GetColumnNames() | ||
for var in self.variables: | ||
if not var in cols: | ||
raise Exception(f"Variable {var} not defined in dataframe.") | ||
|
||
vars_str = ', (float)'.join(self.variables) | ||
df = df.Define(self.var_col, f"ROOT::VecOps::RVec<float>{{{vars_str}}}") | ||
df = df.Define(col_name, self.tmva_helper, [self.var_col]) | ||
return df | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#include "TMVAHelper/TMVAHelper.h" | ||
|
||
|
||
tmva_helper_xgb::tmva_helper_xgb(const std::string &filename, const std::string &name, const unsigned &nvars, const unsigned int nslots) : | ||
model_(name, filename), nvars_(nvars) { | ||
|
||
const unsigned int nslots_actual = std::max(nslots, 1U); | ||
interpreters_.reserve(nslots_actual); | ||
for (unsigned int islot = 0; islot < nslots_actual; ++islot) { | ||
interpreters_.emplace_back(model_); | ||
} | ||
} | ||
|
||
ROOT::VecOps::RVec<float> tmva_helper_xgb::operator()(const ROOT::VecOps::RVec<float> vars) { | ||
auto const tbb_slot = std::max(tbb::this_task_arena::current_thread_index(), 0); | ||
if (tbb_slot >= interpreters_.size()) { | ||
throw std::runtime_error("Not enough interpreters allocated for number of tbb threads"); | ||
} | ||
auto &interpreter_data = interpreters_[tbb_slot]; | ||
return interpreter_data.Compute(vars); | ||
} | ||
|
||
tmva_helper_xgb::~tmva_helper_xgb() {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
#ifndef ZHfunctions_H | ||
#define ZHfunctions_H | ||
|
||
#include <cmath> | ||
#include <vector> | ||
#include <math.h> | ||
|
||
#include "TLorentzVector.h" | ||
#include "ROOT/RVec.hxx" | ||
#include "edm4hep/ReconstructedParticleData.h" | ||
#include "edm4hep/MCParticleData.h" | ||
#include "edm4hep/ParticleIDData.h" | ||
#include "ReconstructedParticle2MC.h" | ||
|
||
|
||
namespace FCCAnalyses { | ||
|
||
// make Lorentzvectors from pseudojets | ||
Vec_tlv makeLorentzVectors(Vec_f jets_px, Vec_f jets_py, Vec_f jets_pz, Vec_f jets_e) { | ||
Vec_tlv result; | ||
for(int i=0; i<jets_px.size(); i++) { | ||
TLorentzVector tlv; | ||
tlv.SetPxPyPzE(jets_px[i], jets_py[i], jets_pz[i], jets_e[i]); | ||
result.push_back(tlv); | ||
} | ||
return result; | ||
} | ||
|
||
Vec_i jetTruthFinder(std::vector<std::vector<int>> constituents, Vec_rp reco, Vec_mc mc, Vec_i mcind, bool findGluons = false) { | ||
// jet truth=finder: match the gen-level partons (eventually with gluons) with the jet constituents | ||
// matching by mimimizing the sum of dr of the parton and all the jet constituents | ||
|
||
Vec_tlv genQuarks; // Lorentz-vector of potential partons (gen-level) | ||
Vec_i genQuarks_pdgId; // corresponding PDG ID | ||
for(size_t i = 0; i < mc.size(); ++i) { | ||
int pdgid = abs(mc.at(i).PDG); | ||
if(pdgid > 6 and not findGluons) continue; // only quarks | ||
if(pdgid > 6 and pdgid != 21 and findGluons) continue; // only quarks and gluons | ||
TLorentzVector tlv; | ||
tlv.SetXYZM(mc.at(i).momentum.x,mc.at(i).momentum.y,mc.at(i).momentum.z,mc.at(i).mass); | ||
genQuarks.push_back(tlv); | ||
genQuarks_pdgId.push_back(mc.at(i).PDG); | ||
} | ||
|
||
Vec_tlv recoParticles; // Lorentz-vector of all reconstructed particles | ||
for(size_t i = 0; i < reco.size(); ++i) { | ||
auto & p = reco[i]; | ||
TLorentzVector tlv; | ||
tlv.SetXYZM(p.momentum.x, p.momentum.y, p.momentum.z, p.mass); | ||
recoParticles.push_back(tlv); | ||
} | ||
|
||
Vec_i usedIdx; | ||
Vec_i result; | ||
for(size_t iJet = 0; iJet < constituents.size(); ++iJet) { | ||
Vec_d dr; | ||
for(size_t iGen = 0; iGen < genQuarks.size(); ++iGen) { | ||
if(std::find(usedIdx.begin(), usedIdx.end(), iGen) != usedIdx.end()) { | ||
dr.push_back(1e99); // set infinite dr, skip | ||
continue; | ||
} | ||
dr.push_back(0); | ||
for(size_t i = 0; i < constituents[iJet].size(); ++i) { | ||
dr[iGen] += recoParticles[constituents[iJet][i]].DeltaR(genQuarks[iGen]); | ||
} | ||
} | ||
int maxDrIdx = std::min_element(dr.begin(),dr.end()) - dr.begin(); | ||
usedIdx.push_back(maxDrIdx); | ||
result.push_back(genQuarks_pdgId[maxDrIdx]); | ||
|
||
} | ||
return result; | ||
} | ||
|
||
|
||
|
||
|
||
|
||
} | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
|
||
processList = { | ||
'wzp6_ee_mumuH_Hbb_ecm240': {'fraction': 1}, | ||
} | ||
|
||
# Production tag when running over EDM4Hep centrally produced events, this points to the yaml files for getting sample statistics (mandatory) | ||
prodTag = "FCCee/winter2023/IDEA/" | ||
|
||
# Link to the dictonary that contains all the cross section informations etc... (mandatory) | ||
procDict = "FCCee_procDict_winter2023_IDEA.json" | ||
|
||
# additional/custom C++ functions, defined in header files (optional) | ||
includePaths = ["functions.h"] | ||
|
||
#Optional: output directory, default is local running directory | ||
outputDir = f"outputs/FCCee/higgs/jetclustering/histmaker/" | ||
|
||
# optional: ncpus, default is 4, -1 uses all cores available | ||
nCPUS = -1 | ||
|
||
# scale the histograms with the cross-section and integrated luminosity | ||
doScale = True | ||
intLumi = 7200000.0 # 7.2 /ab | ||
|
||
|
||
# define some binning for various histograms | ||
bins_p_mu = (250, 0, 250) | ||
bins_m_ll = (250, 0, 250) | ||
bins_p_ll = (250, 0, 250) | ||
bins_recoil = (200, 120, 140) | ||
bins_pdgid = (51, -25.5, 25.5) | ||
bins_dijet_m = (80, 70, 150) | ||
|
||
|
||
# build_graph function that contains the analysis logic, cuts and histograms (mandatory) | ||
def build_graph(df, dataset): | ||
|
||
results = [] | ||
df = df.Define("weight", "1.0") | ||
weightsum = df.Sum("weight") | ||
|
||
df = df.Alias("Particle0", "Particle#0.index") | ||
df = df.Alias("Particle1", "Particle#1.index") | ||
df = df.Alias("MCRecoAssociations0", "MCRecoAssociations#0.index") | ||
df = df.Alias("MCRecoAssociations1", "MCRecoAssociations#1.index") | ||
|
||
# select muons from Z decay and form Z/recoil mass | ||
df = df.Alias("Muon0", "Muon#0.index") | ||
df = df.Define("muons_all", "FCCAnalyses::ReconstructedParticle::get(Muon0, ReconstructedParticles)") | ||
df = df.Define("muons", "FCCAnalyses::ReconstructedParticle::sel_p(25)(muons_all)") | ||
df = df.Define("muons_p", "FCCAnalyses::ReconstructedParticle::get_p(muons)") | ||
df = df.Define("muons_no", "FCCAnalyses::ReconstructedParticle::get_n(muons)") | ||
df = df.Filter("muons_no >= 2") | ||
|
||
df = df.Define("zmumu", "ReconstructedParticle::resonanceBuilder(91)(muons)") | ||
df = df.Define("zmumu_m", "ReconstructedParticle::get_mass(zmumu)[0]") | ||
df = df.Define("zmumu_p", "ReconstructedParticle::get_p(zmumu)[0]") | ||
df = df.Define("zmumu_recoil", "ReconstructedParticle::recoilBuilder(240)(zmumu)") | ||
df = df.Define("zmumu_recoil_m", "ReconstructedParticle::get_mass(zmumu_recoil)[0]") | ||
|
||
# basic selection | ||
df = df.Filter("zmumu_m > 70 && zmumu_m < 100") | ||
df = df.Filter("zmumu_p > 20 && zmumu_p < 70") | ||
df = df.Filter("zmumu_recoil_m < 140 && zmumu_recoil_m > 120") | ||
|
||
|
||
# do jet clustering on all particles, except the muons | ||
df = df.Define("rps_no_muons", "FCCAnalyses::ReconstructedParticle::remove(ReconstructedParticles, muons)") | ||
df = df.Define("RP_px", "FCCAnalyses::ReconstructedParticle::get_px(rps_no_muons)") | ||
df = df.Define("RP_py", "FCCAnalyses::ReconstructedParticle::get_py(rps_no_muons)") | ||
df = df.Define("RP_pz","FCCAnalyses::ReconstructedParticle::get_pz(rps_no_muons)") | ||
df = df.Define("RP_e", "FCCAnalyses::ReconstructedParticle::get_e(rps_no_muons)") | ||
df = df.Define("pseudo_jets", "FCCAnalyses::JetClusteringUtils::set_pseudoJets(RP_px, RP_py, RP_pz, RP_e)") | ||
|
||
|
||
# Implemented algorithms and arguments: https://github.com/HEP-FCC/FCCAnalyses/blob/master/addons/FastJet/JetClustering.h | ||
# More info: https://indico.cern.ch/event/1173562/contributions/4929025/attachments/2470068/4237859/2022-06-FCC-jets.pdf | ||
df = df.Define("clustered_jets", "JetClustering::clustering_ee_kt(2, 2, 0, 10)(pseudo_jets)") # 2-jet exclusive clustering | ||
|
||
df = df.Define("jets", "FCCAnalyses::JetClusteringUtils::get_pseudoJets(clustered_jets)") | ||
df = df.Define("jetconstituents", "FCCAnalyses::JetClusteringUtils::get_constituents(clustered_jets)") # one-to-one mapping to the input collection (rps_no_muons) | ||
df = df.Define("jets_e", "FCCAnalyses::JetClusteringUtils::get_e(jets)") | ||
df = df.Define("jets_px", "FCCAnalyses::JetClusteringUtils::get_px(jets)") | ||
df = df.Define("jets_py", "FCCAnalyses::JetClusteringUtils::get_py(jets)") | ||
df = df.Define("jets_pz", "FCCAnalyses::JetClusteringUtils::get_pz(jets)") | ||
df = df.Define("jets_phi", "FCCAnalyses::JetClusteringUtils::get_phi(jets)") | ||
df = df.Define("jets_m", "FCCAnalyses::JetClusteringUtils::get_m(jets)") | ||
|
||
# convert jets to LorentzVectors | ||
df = df.Define("jets_tlv", "FCCAnalyses::makeLorentzVectors(jets_px, jets_py, jets_pz, jets_e)") | ||
df = df.Define("jets_truth", "FCCAnalyses::jetTruthFinder(jetconstituents, rps_no_muons, Particle, MCRecoAssociations1)") # returns best-matched PDG ID of the jets | ||
df = df.Define("dijet_higgs_m", "(jets_tlv[0]+jets_tlv[1]).M()") | ||
|
||
# define histograms | ||
results.append(df.Histo1D(("zmumu_m", "", *bins_m_ll), "zmumu_m")) | ||
results.append(df.Histo1D(("zmumu_p", "", *bins_p_ll), "zmumu_p")) | ||
results.append(df.Histo1D(("zmumu_recoil_m", "", *bins_recoil), "zmumu_recoil_m")) | ||
|
||
results.append(df.Histo1D(("jets_truth", "", *bins_pdgid), "jets_truth")) | ||
results.append(df.Histo1D(("dijet_higgs_m", "", *bins_dijet_m), "dijet_higgs_m")) | ||
|
||
return results, weightsum | ||
|
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can those functions be moved to the
analyzers/dataframe
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will do a major port of all the C++ functions in a subsequent PR (will add some more functions as well).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK