From 5d6d997f9d2b5bc643bfa560a4b107137bdf51be Mon Sep 17 00:00:00 2001 From: bistapf <79460851+bistapf@users.noreply.github.com> Date: Wed, 16 Oct 2024 13:54:21 +0200 Subject: [PATCH] Adding FCC-hh analysis example, code and event weight processing (#411) * removing outdated FCChh examples that no longer run * init bbyy working example for FCChh * copy old FCChh analyzers * update collection names in get_tagged_jets functions - not functional yet * rewrite of get_tagged_jets to account for reversed link direction PID->RP in latest edm4hep versions * apply clang format * first versions of FCChh example final and plotting scripts * add first version of using event weights for scaling etc * fix function to get_tagged jets following PR#131 in k4SimDelphes * clean up debug prints * add bbyy normalisation and adapt to splitting b and tau-tags in k4simdelphes * FCC-hh bbyy mini example the old way, pre-datasource * clang formatting * add fcchh analysis to test suite * add do_weighted option to config dict, clean up comments * add printing of selected weighted events * fix writing sow to output tree * add check that weight column exists, comments from pr411 review * add do_weighted option to man page * clean up FCChh bbyy example, add example for checking btag efficiencies from delphes --- .../dataframe/FCCAnalyses/Analysis_FCChh.h | 490 +++ analyzers/dataframe/src/Analysis_FCChh.cc | 3577 +++++++++++++++++ examples/FCChh/HH_bbtautau/CMakeLists.txt | 3 - examples/FCChh/HH_bbtautau/analysis.cc | 82 - examples/FCChh/HH_bbtautau/analysis.py | 77 - examples/FCChh/HH_bbtautau/finalSel.py | 47 - examples/FCChh/HH_bbtautau/plots.py | 40 - examples/FCChh/HH_bbtautau/preSel.py | 26 - examples/FCChh/ggHH_bbyy/analysis_final.py | 50 + .../FCChh/ggHH_bbyy/analysis_plot_tag_eff.py | 156 + examples/FCChh/ggHH_bbyy/analysis_plots.py | 37 + examples/FCChh/ggHH_bbyy/analysis_stage1.py | 143 + examples/FCChh/ggHH_bbyy/plot_tag_eff.py | 199 + examples/FCChh/ttHH/analysis.py | 71 - examples/FCChh/ttHH/finalSel.py | 47 - examples/FCChh/ttHH/plots.py | 47 - examples/FCChh/ttHH/preSel.py | 18 - examples/FCChh/tth_4l/CMakeLists.txt | 4 - examples/FCChh/tth_4l/Makefile | 19 - examples/FCChh/tth_4l/fcchh_ana_tth_4l.cxx | 101 - examples/FCChh/tth_4l/fcchh_ana_tth_4l.py | 71 - examples/FCChh/tth_4l/run.py | 41 - man/man7/fccanalysis-script.7 | 7 + python/process.py | 66 + python/run_fccanalysis.py | 98 +- python/run_final_analysis.py | 114 +- tests/CMakeLists.txt | 2 + 27 files changed, 4905 insertions(+), 728 deletions(-) create mode 100644 analyzers/dataframe/FCCAnalyses/Analysis_FCChh.h create mode 100644 analyzers/dataframe/src/Analysis_FCChh.cc delete mode 100644 examples/FCChh/HH_bbtautau/CMakeLists.txt delete mode 100644 examples/FCChh/HH_bbtautau/analysis.cc delete mode 100644 examples/FCChh/HH_bbtautau/analysis.py delete mode 100644 examples/FCChh/HH_bbtautau/finalSel.py delete mode 100644 examples/FCChh/HH_bbtautau/plots.py delete mode 100644 examples/FCChh/HH_bbtautau/preSel.py create mode 100644 examples/FCChh/ggHH_bbyy/analysis_final.py create mode 100644 examples/FCChh/ggHH_bbyy/analysis_plot_tag_eff.py create mode 100644 examples/FCChh/ggHH_bbyy/analysis_plots.py create mode 100644 examples/FCChh/ggHH_bbyy/analysis_stage1.py create mode 100644 examples/FCChh/ggHH_bbyy/plot_tag_eff.py delete mode 100644 examples/FCChh/ttHH/analysis.py delete mode 100644 examples/FCChh/ttHH/finalSel.py delete mode 100644 examples/FCChh/ttHH/plots.py delete mode 100644 examples/FCChh/ttHH/preSel.py delete mode 100644 examples/FCChh/tth_4l/CMakeLists.txt delete mode 100644 examples/FCChh/tth_4l/Makefile delete mode 100644 examples/FCChh/tth_4l/fcchh_ana_tth_4l.cxx delete mode 100644 examples/FCChh/tth_4l/fcchh_ana_tth_4l.py delete mode 100644 examples/FCChh/tth_4l/run.py diff --git a/analyzers/dataframe/FCCAnalyses/Analysis_FCChh.h b/analyzers/dataframe/FCCAnalyses/Analysis_FCChh.h new file mode 100644 index 0000000000..e2fa37bb03 --- /dev/null +++ b/analyzers/dataframe/FCCAnalyses/Analysis_FCChh.h @@ -0,0 +1,490 @@ +// library with extra functions needed by custom FCC-hh analysis, such as +// HHbbZZ(llvv) analysis + +#ifndef ANALYSIS_FCCHH_ANALYZERS_H +#define ANALYSIS_FCCHH_ANALYZERS_H + +#include "ROOT/RVec.hxx" +#include "TLorentzVector.h" +#include "TString.h" + +#include "edm4hep/MCParticleData.h" +#include "edm4hep/ParticleIDData.h" +#include "edm4hep/ReconstructedParticleData.h" +#include "podio/ObjectID.h" + +#include + +namespace AnalysisFCChh { + +/// TESTER: return the transverse momenta of the input ReconstructedParticles +// ROOT::VecOps::RVec +// get_pt_test(ROOT::VecOps::RVec in); + +// helpers for reco particles: +TLorentzVector getTLV_reco(edm4hep::ReconstructedParticleData reco_part); +TLorentzVector getTLV_MC(edm4hep::MCParticleData MC_part); + +// struct to use for a pair of two reco particles, to make sure the correct ones +// stay together +struct RecoParticlePair { + edm4hep::ReconstructedParticleData particle_1; + edm4hep::ReconstructedParticleData particle_2; + TLorentzVector merged_TLV() { + TLorentzVector tlv_1 = getTLV_reco(particle_1); + TLorentzVector tlv_2 = getTLV_reco(particle_2); + return tlv_1 + tlv_2; + } + void sort_by_pT() { + double pT_1 = sqrt(particle_1.momentum.x * particle_1.momentum.x + + particle_1.momentum.y * particle_1.momentum.y); + double pT_2 = sqrt(particle_2.momentum.x * particle_2.momentum.x + + particle_2.momentum.y * particle_2.momentum.y); + + if (pT_1 >= pT_2) { + return; + } // nothing to do if already sorted corrected + else { + edm4hep::ReconstructedParticleData sublead = particle_1; + + particle_1 = particle_2; + particle_2 = sublead; + return; + } + } +}; + +// same for MC particle +struct MCParticlePair { + edm4hep::MCParticleData particle_1; + edm4hep::MCParticleData particle_2; + TLorentzVector merged_TLV() { + TLorentzVector tlv_1 = getTLV_MC(particle_1); + TLorentzVector tlv_2 = getTLV_MC(particle_2); + return tlv_1 + tlv_2; + } + void sort_by_pT() { + double pT_1 = sqrt(particle_1.momentum.x * particle_1.momentum.x + + particle_1.momentum.y * particle_1.momentum.y); + double pT_2 = sqrt(particle_2.momentum.x * particle_2.momentum.x + + particle_2.momentum.y * particle_2.momentum.y); + + if (pT_1 >= pT_2) { + return; + } // nothing to do if already sorted corrected + else { + edm4hep::MCParticleData sublead = particle_1; + + particle_1 = particle_2; + particle_2 = sublead; + return; + } + } +}; + +// merge the particles in such a pair into one edm4hep:RecoParticle to use with +// other functions (in a vector) +ROOT::VecOps::RVec +merge_pairs(ROOT::VecOps::RVec pairs); +int get_n_pairs(ROOT::VecOps::RVec pairs); +ROOT::VecOps::RVec +get_first_pair(ROOT::VecOps::RVec + pairs); // can use to get leading pair if the inputs to pair + // finding fct were pT sorted + +// functions to separate the pair again - ONLY DOES THIS FOR THE FIRST PAIR IN +// THE VECTOR +ROOT::VecOps::RVec +get_first_from_pair(ROOT::VecOps::RVec pairs); +ROOT::VecOps::RVec +get_second_from_pair(ROOT::VecOps::RVec pairs); + +// truth filter used to get ZZ(llvv) events from the ZZ(llvv+4l+4v) inclusive +// signal samples +bool ZZllvvFilter(ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids); +bool WWlvlvFilter(ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids, + ROOT::VecOps::RVec parent_ids); + +// helper functions for the ZZllv truth filter: +bool isStablePhoton(edm4hep::MCParticleData truth_part); +bool isPhoton(edm4hep::MCParticleData truth_part); +bool isLep(edm4hep::MCParticleData truth_part); +bool isLightLep(edm4hep::MCParticleData truth_part); +bool isNeutrino(edm4hep::MCParticleData truth_part); +bool isQuark(edm4hep::MCParticleData truth_part); +bool isZ(edm4hep::MCParticleData truth_part); +bool isW(edm4hep::MCParticleData truth_part); +bool isTau(edm4hep::MCParticleData truth_part); +bool isH(edm4hep::MCParticleData truth_part); +bool isb(edm4hep::MCParticleData truth_part); +bool isHadron(edm4hep::MCParticleData truth_part); +bool isTop(edm4hep::MCParticleData truth_part); +bool isGluon(edm4hep::MCParticleData truth_part); +bool isc(edm4hep::MCParticleData truth_part); +bool iss(edm4hep::MCParticleData truth_part); +bool isMuon(edm4hep::MCParticleData truth_part); +int checkZDecay(edm4hep::MCParticleData truth_Z, + ROOT::VecOps::RVec daughter_ids, + ROOT::VecOps::RVec truth_particles); +int checkWDecay(edm4hep::MCParticleData truth_W, + ROOT::VecOps::RVec daughter_ids, + ROOT::VecOps::RVec truth_particles); +int findTopDecayChannel( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids); +int findHiggsDecayChannel( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids); + +// truth level fct to get a Z->ll truth decay +ROOT::VecOps::RVec +getTruthZll(ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids); + +// find the SFOS pair of reconstructed leptons (electrons or muons) +ROOT::VecOps::RVec +getOSPairs(ROOT::VecOps::RVec leptons_in); +ROOT::VecOps::RVec getDFOSPairs( + ROOT::VecOps::RVec electrons_in, + ROOT::VecOps::RVec muons_in); +// ROOT::VecOps::RVec +// getOSPair(ROOT::VecOps::RVec leptons_in); +ROOT::VecOps::RVec getBestOSPair( + ROOT::VecOps::RVec electron_pairs, + ROOT::VecOps::RVec muon_pairs); // closest to Z mass +ROOT::VecOps::RVec +getLeadingPair(ROOT::VecOps::RVec electron_pairs, + ROOT::VecOps::RVec + muon_pairs); // pair with leading pT(pair) + +// make a general pair, not caring about charges, e.g. the two b-jets +ROOT::VecOps::RVec +getPairs(ROOT::VecOps::RVec particles_in); +ROOT::VecOps::RVec getPair_sublead( + ROOT::VecOps::RVec particles_in); +ROOT::VecOps::RVec +getPairs(ROOT::VecOps::RVec particles_in); + +// SORT OBJ COLLECTION +ROOT::VecOps::RVec SortParticleCollection( + ROOT::VecOps::RVec particles_in); + +// btags +ROOT::VecOps::RVec +getJet_tag(ROOT::VecOps::RVec index, + ROOT::VecOps::RVec pid, + ROOT::VecOps::RVec values, int algoIndex); +ROOT::VecOps::RVec +getBhadron(ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec parent_ids); +ROOT::VecOps::RVec +getChadron(ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec parent_ids); +ROOT::VecOps::RVec +get_tagged_jets(ROOT::VecOps::RVec jets, + ROOT::VecOps::RVec jet_tags, + ROOT::VecOps::RVec jet_tags_indices, + ROOT::VecOps::RVec jet_tags_values, int algoIndex); +ROOT::VecOps::RVec +get_untagged_jets(ROOT::VecOps::RVec jets, + ROOT::VecOps::RVec index, + ROOT::VecOps::RVec pid, + ROOT::VecOps::RVec values, int algoIndex); + +// tau jets +ROOT::VecOps::RVec find_truth_matches( + ROOT::VecOps::RVec truth_parts, + ROOT::VecOps::RVec reco_particles, + float dR_thres); +ROOT::VecOps::RVec +get_tau_jets(ROOT::VecOps::RVec jets, + ROOT::VecOps::RVec index, + ROOT::VecOps::RVec pid, + ROOT::VecOps::RVec tag_values, int algoIndex); +ROOT::VecOps::RVec +getTruthTauHads(ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids, + ROOT::VecOps::RVec parent_ids, TString type); +ROOT::VecOps::RVec +getTruthTau(ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids, + ROOT::VecOps::RVec parent_ids, TString type); +ROOT::VecOps::RVec +getTruthTauLeps(ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids, + ROOT::VecOps::RVec parent_ids, TString type); +// isolation: select only those particles of sel_parts that are isolated by the +// given dR from the check_parts +ROOT::VecOps::RVec +sel_isolated(ROOT::VecOps::RVec sel_parts, + ROOT::VecOps::RVec check_parts, + float dR_thres = 0.4); + +// merge two four vectors into one to create a new particle (follow vector +// structure to be able to use with other RecoParticle fcts easily like get_pt +// etc.) +ROOT::VecOps::RVec merge_parts_TLVs( + ROOT::VecOps::RVec particle_1, + ROOT::VecOps::RVec particle_2); +ROOT::VecOps::RVec +merge_parts_TLVs(ROOT::VecOps::RVec particle_1, + ROOT::VecOps::RVec particle_2); + +// reco level quantities +// transverse masses: +ROOT::VecOps::RVec +get_mT(ROOT::VecOps::RVec particle_1, + ROOT::VecOps::RVec particle_2); +ROOT::VecOps::RVec +get_mT_new(ROOT::VecOps::RVec particle_1, + ROOT::VecOps::RVec particle_2); +ROOT::VecOps::RVec +get_m_pseudo(ROOT::VecOps::RVec Z_ll_pair, + ROOT::VecOps::RVec MET_obj); +ROOT::VecOps::RVec +get_mT_pseudo(ROOT::VecOps::RVec Z_ll_pair, + ROOT::VecOps::RVec MET_obj); +TLorentzVector getTLV_MET(edm4hep::ReconstructedParticleData met_object); + +// stransverse mass mT2 : +// https://www.hep.phy.cam.ac.uk/~lester/mt2/#Alternatives +// ROOT::VecOps::RVec +// get_mT2(ROOT::VecOps::RVec particle_1, +// ROOT::VecOps::RVec particle_2, +// ROOT::VecOps::RVec MET_obj); +// ROOT::VecOps::RVec +// get_mT2_125(ROOT::VecOps::RVec +// particle_1, ROOT::VecOps::RVec +// particle_2, ROOT::VecOps::RVec MET_obj); + +// angular distances:function can return dR, dEta, dPhi for any two fully +// reconstructed particles that have a full 4 vector +ROOT::VecOps::RVec get_angularDist( + ROOT::VecOps::RVec particle_1, + ROOT::VecOps::RVec particle_2, + TString type = "dR"); +ROOT::VecOps::RVec get_angularDist_MET( + ROOT::VecOps::RVec particle_1, + ROOT::VecOps::RVec MET_obj, + TString type = "dR"); + +ROOT::VecOps::RVec +get_angularDist_pair(ROOT::VecOps::RVec pairs, + TString type = "dR"); +ROOT::VecOps::RVec +get_angularDist_pair(ROOT::VecOps::RVec pairs, + TString type = "dR"); + +// HT variables +ROOT::VecOps::RVec +get_HT2(ROOT::VecOps::RVec particle_1, + ROOT::VecOps::RVec particle_2); +ROOT::VecOps::RVec +get_HT_wInv(ROOT::VecOps::RVec MET, + ROOT::VecOps::RVec ll_pair, + ROOT::VecOps::RVec bb_pair); +ROOT::VecOps::RVec +get_HT_true(ROOT::VecOps::RVec ll_pair, + ROOT::VecOps::RVec bb_pair); +ROOT::VecOps::RVec get_HT2_ratio(ROOT::VecOps::RVec HT2, + ROOT::VecOps::RVec HT_wInv); +ROOT::VecOps::RVec +get_MET_significance(ROOT::VecOps::RVec MET, + ROOT::VecOps::RVec HT_true, bool doSqrt = true); + +// reco mass of lepton+b-jet, to try suppress ttbar processes +ROOT::VecOps::RVec +make_lb_pairing(ROOT::VecOps::RVec lepton_pair, + ROOT::VecOps::RVec bb_pair); +ROOT::VecOps::RVec +get_mlb_reco(ROOT::VecOps::RVec lb_pairs); +ROOT::VecOps::RVec +get_mlb_MET_reco(ROOT::VecOps::RVec lb_pairs, + ROOT::VecOps::RVec MET); + +// for separating bbWW and bbtautau? +ROOT::VecOps::RVec +get_pzeta_vis(ROOT::VecOps::RVec lepton_pair); +ROOT::VecOps::RVec +get_pzeta_miss(ROOT::VecOps::RVec lepton_pair, + ROOT::VecOps::RVec MET); +ROOT::VecOps::RVec get_dzeta(ROOT::VecOps::RVec pzeta_miss, + ROOT::VecOps::RVec pzeta_vis, + float factor = 0.85); + +// combine particles: +ROOT::VecOps::RVec +build_HZZ(ROOT::VecOps::RVec Z_ll_pair, + ROOT::VecOps::RVec MET_obj); + +// retrieve children from a given truth particle +ROOT::VecOps::RVec get_immediate_children( + edm4hep::MCParticleData truth_part, + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids); + +// select the truth Higgs, depending on which particles it decays to: +ROOT::VecOps::RVec +get_truth_Higgs(ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids, + TString decay = "ZZ"); +ROOT::VecOps::RVec +get_truth_Z_decay(ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids, + TString decay = "ZZ"); + +// Filters and specifics for the bbtautau analysis: +bool isFromHadron(edm4hep::MCParticleData truth_part, + ROOT::VecOps::RVec parent_ids, + ROOT::VecOps::RVec truth_particles); +bool hasHiggsParent( + edm4hep::MCParticleData truth_part, + ROOT::VecOps::RVec parent_ids, + ROOT::VecOps::RVec truth_particles); +bool isFromHiggsDirect( + edm4hep::MCParticleData truth_part, + ROOT::VecOps::RVec parent_ids, + ROOT::VecOps::RVec truth_particles); +bool isChildOfTauFromHiggs( + edm4hep::MCParticleData truth_part, + ROOT::VecOps::RVec parent_ids, + ROOT::VecOps::RVec truth_particles); +bool isChildOfWFromHiggs( + edm4hep::MCParticleData truth_part, + ROOT::VecOps::RVec parent_ids, + ROOT::VecOps::RVec truth_particles); +bool isChildOfZFromHiggs( + edm4hep::MCParticleData truth_part, + ROOT::VecOps::RVec parent_ids, + ROOT::VecOps::RVec truth_particles); +ROOT::VecOps::RVec +getLepsFromTau(ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec parent_ids); +ROOT::VecOps::RVec +getLepsFromW(ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec parent_ids); +ROOT::VecOps::RVec +getLepsFromZ(ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec parent_ids); +ROOT::VecOps::RVec +getPhotonsFromH(ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec parent_ids); +ROOT::VecOps::RVec getTruthLepLepFlavour( + ROOT::VecOps::RVec leps_from_tau); +ROOT::VecOps::RVec +getTruthEle(ROOT::VecOps::RVec leps_from_tau); +ROOT::VecOps::RVec +getTruthMu(ROOT::VecOps::RVec leps_from_tau); + +// tautau specific masses/variables +ROOT::VecOps::RVec get_x_fraction( + ROOT::VecOps::RVec visible_particle, + ROOT::VecOps::RVec MET); +ROOT::VecOps::RVec get_x_fraction_truth( + ROOT::VecOps::RVec visible_particle, + ROOT::VecOps::RVec MET); +ROOT::VecOps::RVec get_mtautau_col( + ROOT::VecOps::RVec ll_pair_merged, + ROOT::VecOps::RVec x1, ROOT::VecOps::RVec x2); +ROOT::VecOps::RVec get_mbbtautau_col( + ROOT::VecOps::RVec bb_pair_merged, + ROOT::VecOps::RVec mtautau_col); + +// truth matching: +// old function that can match only 1 particle -> TO REMOVE? +ROOT::VecOps::RVec find_reco_matched( + ROOT::VecOps::RVec truth_parts_to_match, + ROOT::VecOps::RVec reco_parts_all, + float dR_thres = 0.1); + +// isolation criterion, delphes style. flag exclude_light_leps does not check +// for isolation of test_parts vs electrons or muons (using the mass) as seems +// to be done in FCC-hh delphes sim +ROOT::VecOps::RVec get_IP_delphes( + ROOT::VecOps::RVec test_parts, + ROOT::VecOps::RVec reco_parts_all, + float dR_min = 0.3, float pT_min = 0.5, bool exclude_light_leps = true); + +ROOT::VecOps::RVec +filter_lightLeps(ROOT::VecOps::RVec recind, ROOT::VecOps::RVec mcind, + ROOT::VecOps::RVec reco, + ROOT::VecOps::RVec mc); + +// truth MET +ROOT::VecOps::RVec +getNusFromW(ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec parent_ids); +ROOT::VecOps::RVec +getTruthMETObj(ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec parent_ids, + TString type = "hww_only"); + +// for checking signal efficiencies in delphes card validation +ROOT::VecOps::RVec find_reco_matches( + ROOT::VecOps::RVec truth_parts, + ROOT::VecOps::RVec reco_particles, + float dR_thres = 0.1); +ROOT::VecOps::RVec +find_reco_matches_no_remove( + ROOT::VecOps::RVec truth_parts, + ROOT::VecOps::RVec reco_particles, + float dR_thres = 0.1); +ROOT::VecOps::RVec +find_reco_matches_exclusive( + ROOT::VecOps::RVec truth_parts, + ROOT::VecOps::RVec truth_parts_exc, + ROOT::VecOps::RVec reco_particles, + float dR_thres = 0.1); +ROOT::VecOps::RVec find_reco_match_indices( + ROOT::VecOps::RVec truth_parts, + ROOT::VecOps::RVec reco_particles, + float dR_thres = 0.1); +ROOT::VecOps::RVec +find_reco_matched_particle( + edm4hep::MCParticleData truth_part_to_match, + ROOT::VecOps::RVec check_reco_parts, + float dR_thres = 0.1); +ROOT::VecOps::RVec find_mc_matched_particle( + edm4hep::ReconstructedParticleData reco_part_to_match, + ROOT::VecOps::RVec check_mc_parts, + float dR_thres = 0.1); +ROOT::VecOps::RVec find_reco_matched_index( + edm4hep::MCParticleData truth_part_to_match, + ROOT::VecOps::RVec check_reco_parts, + float dR_thres = 0.1); +ROOT::VecOps::RVec +find_true_signal_leps_reco_matches( + ROOT::VecOps::RVec truth_leps_to_match, + ROOT::VecOps::RVec reco_electrons, + ROOT::VecOps::RVec reco_muons, + float dR_thres = 0.1); +ROOT::VecOps::RVec find_truth_to_reco_matches_indices( + ROOT::VecOps::RVec truth_leps_to_match, + ROOT::VecOps::RVec reco_parts, + int pdg_ID, float dR_thres = 0.1); + +// retrieving isoVar from delphes: +// ROOT::VecOps::RVec +// get_isoVar(ROOT::VecOps::RVec +// reco_parts_to_check, ROOT::VecOps::RVec +// all_reco_parts); + +// template function for getting vector via indices - needed to read e.g. +// UserDataCollections +template +ROOT::VecOps::RVec get(const ROOT::VecOps::RVec &index, + const ROOT::VecOps::RVec &in) { + ROOT::VecOps::RVec result; + result.reserve(index.size()); + for (size_t i = 0; i < index.size(); ++i) { + if (index[i] > -1) + result.push_back(in[index[i]]); + } + return result; +} + +} // namespace AnalysisFCChh + +#endif diff --git a/analyzers/dataframe/src/Analysis_FCChh.cc b/analyzers/dataframe/src/Analysis_FCChh.cc new file mode 100644 index 0000000000..8033813f67 --- /dev/null +++ b/analyzers/dataframe/src/Analysis_FCChh.cc @@ -0,0 +1,3577 @@ +#include "FCCAnalyses/Analysis_FCChh.h" +// #include "FCCAnalyses/lester_mt2_bisect.h" + +#include + +using namespace AnalysisFCChh; + +// truth filter helper functions: +bool AnalysisFCChh::isStablePhoton(edm4hep::MCParticleData truth_part) { + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) == 22 && truth_part.generatorStatus == 1) { + return true; + } else { + return false; + } +} + +bool AnalysisFCChh::isPhoton(edm4hep::MCParticleData truth_part) { + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) == 22) { + return true; + } else { + return false; + } +} + +bool AnalysisFCChh::isLep(edm4hep::MCParticleData truth_part) { + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) == 11 || abs(pdg_id) == 13 || abs(pdg_id) == 15) { + return true; + } else { + return false; + } +} + +bool AnalysisFCChh::isLightLep( + edm4hep::MCParticleData truth_part) // only electrons or muons, no taus +{ + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) == 11 || abs(pdg_id) == 13) { + return true; + } else { + return false; + } +} + +bool AnalysisFCChh::isNeutrino(edm4hep::MCParticleData truth_part) { + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) == 12 || abs(pdg_id) == 14 || abs(pdg_id) == 16) { + return true; + } else { + return false; + } +} + +bool AnalysisFCChh::isQuark(edm4hep::MCParticleData truth_part) { + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) >= 1 && abs(pdg_id) <= 6) { + return true; + } else { + return false; + } +} + +bool AnalysisFCChh::isZ(edm4hep::MCParticleData truth_part) { + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) == 23) { + return true; + } else { + return false; + } +} + +bool AnalysisFCChh::isW(edm4hep::MCParticleData truth_part) { + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) == 24) { + return true; + } else { + return false; + } +} + +bool AnalysisFCChh::isTau(edm4hep::MCParticleData truth_part) { + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) == 15) { + return true; + } else { + return false; + } +} + +bool AnalysisFCChh::isH(edm4hep::MCParticleData truth_part) { + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) == 25) { + return true; + } else { + return false; + } +} + +bool AnalysisFCChh::isb(edm4hep::MCParticleData truth_part) { + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) == 5) { + return true; + } else { + return false; + } +} + +bool AnalysisFCChh::isHadron(edm4hep::MCParticleData truth_part) { + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) >= 100) { + return true; + } else { + return false; + } +} + +bool AnalysisFCChh::isTop(edm4hep::MCParticleData truth_part) { + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) == 6) { + return true; + } else { + return false; + } +} + +bool AnalysisFCChh::isGluon(edm4hep::MCParticleData truth_part) { + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) == 21) { + return true; + } else { + return false; + } +} + +bool AnalysisFCChh::isc(edm4hep::MCParticleData truth_part) { + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) == 4) { + return true; + } else { + return false; + } +} + +bool AnalysisFCChh::iss(edm4hep::MCParticleData truth_part) { + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) == 3) { + return true; + } else { + return false; + } +} + +bool AnalysisFCChh::isMuon(edm4hep::MCParticleData truth_part) { + auto pdg_id = truth_part.PDG; + // std::cout << "pdg id of truth part is" << pdg_id << std::endl; + if (abs(pdg_id) == 13) { + return true; + } else { + return false; + } +} + +// check if a truth particle came from a hadron decay, needed to veto taus that +// come from b-meson decays in the bbtautau samples +bool AnalysisFCChh::isFromHadron( + edm4hep::MCParticleData truth_part, + ROOT::VecOps::RVec parent_ids, + ROOT::VecOps::RVec truth_particles) { + + // cannot use the podio getParents fct, need to implement manually: + auto first_parent_index = truth_part.parents_begin; + auto last_parent_index = truth_part.parents_end; + + // loop over all parents (usually onle 1, but sometimes more for reasons not + // understood?): + for (int parent_i = first_parent_index; parent_i < last_parent_index; + parent_i++) { + // first get the index from the parent + auto parent_MC_index = parent_ids.at(parent_i).index; + + // then go back to the original vector of MCParticles + auto parent = truth_particles.at(parent_MC_index); + + // std::cout << "Found parent of the tau as:" << parent.PDG << std::endl; + if (abs(parent.PDG) >= 100) { + return true; + } + } + return false; +} + +// check if a truth particle had a Higgs as a parent somewhere up the chain +bool AnalysisFCChh::hasHiggsParent( + edm4hep::MCParticleData truth_part, + ROOT::VecOps::RVec parent_ids, + ROOT::VecOps::RVec truth_particles) { + + // cannot use the podio getParents fct, need to implement manually: + auto first_parent_index = truth_part.parents_begin; + auto last_parent_index = truth_part.parents_end; + + // loop over all parents (usually onle 1, but sometimes more for reasons not + // understood?): + for (int parent_i = first_parent_index; parent_i < last_parent_index; + parent_i++) { + // first get the index from the parent + auto parent_MC_index = parent_ids.at(parent_i).index; + + // then go back to the original vector of MCParticles + auto parent = truth_particles.at(parent_MC_index); + + // std::cout << "Found parent of the tau as:" << parent.PDG << std::endl; + if (isH(parent)) { + return true; + } + return hasHiggsParent(parent, parent_ids, truth_particles); + } + + return false; +} + +// check if the immediate parent of a particle is a Higgs +bool AnalysisFCChh::isFromHiggsDirect( + edm4hep::MCParticleData truth_part, + ROOT::VecOps::RVec parent_ids, + ROOT::VecOps::RVec truth_particles) { + + // cannot use the podio getParents fct, need to implement manually: + auto first_parent_index = truth_part.parents_begin; + auto last_parent_index = truth_part.parents_end; + + // loop over all parents (usually only 1, but sometimes more for reasons not + // understood?): + for (int parent_i = first_parent_index; parent_i < last_parent_index; + parent_i++) { + // first get the index from the parent + auto parent_MC_index = parent_ids.at(parent_i).index; + + // then go back to the original vector of MCParticles + auto parent = truth_particles.at(parent_MC_index); + + // std::cout << "Found parent of the tau as:" << parent.PDG << std::endl; + if (isH(parent)) { + return true; + } + } + + return false; +} + +// check if a particle came from a tau that itself came from a Higgs, and not +// ever from a hadron +bool AnalysisFCChh::isChildOfTauFromHiggs( + edm4hep::MCParticleData truth_part, + ROOT::VecOps::RVec parent_ids, + ROOT::VecOps::RVec truth_particles) { + // cannot use the podio getParents fct, need to implement manually: + auto first_parent_index = truth_part.parents_begin; + auto last_parent_index = truth_part.parents_end; + + // loop over all parents (usually onle 1, but sometimes more for reasons not + // understood?): + for (int parent_i = first_parent_index; parent_i < last_parent_index; + parent_i++) { + // first get the index from the parent + auto parent_MC_index = parent_ids.at(parent_i).index; + + // then go back to the original vector of MCParticles + auto parent = truth_particles.at(parent_MC_index); + + if (isTau(parent)) { + // veto taus from b-decays + if (isFromHadron(parent, parent_ids, truth_particles)) { + return false; + } + if (hasHiggsParent(parent, parent_ids, truth_particles)) { + return true; + } + } + } + return false; +} + +// check if a particle came from a Z that itself came from a Higgs, and not ever +// from a hadron +bool AnalysisFCChh::isChildOfZFromHiggs( + edm4hep::MCParticleData truth_part, + ROOT::VecOps::RVec parent_ids, + ROOT::VecOps::RVec truth_particles) { + // cannot use the podio getParents fct, need to implement manually: + auto first_parent_index = truth_part.parents_begin; + auto last_parent_index = truth_part.parents_end; + + // loop over all parents (usually onle 1, but sometimes more for reasons not + // understood?): + for (int parent_i = first_parent_index; parent_i < last_parent_index; + parent_i++) { + // first get the index from the parent + auto parent_MC_index = parent_ids.at(parent_i).index; + + // then go back to the original vector of MCParticles + auto parent = truth_particles.at(parent_MC_index); + + if (isZ(parent)) { + // veto taus from b-decays + if (isFromHadron(parent, parent_ids, truth_particles)) { + return false; + } + if (hasHiggsParent(parent, parent_ids, truth_particles)) { + return true; + } + } + } + return false; +} + +// check if a particle came from a W that itself came from a Higgs, and not ever +// from a hadron +bool AnalysisFCChh::isChildOfWFromHiggs( + edm4hep::MCParticleData truth_part, + ROOT::VecOps::RVec parent_ids, + ROOT::VecOps::RVec truth_particles) { + // cannot use the podio getParents fct, need to implement manually: + auto first_parent_index = truth_part.parents_begin; + auto last_parent_index = truth_part.parents_end; + + // loop over all parents (usually only 1, but sometimes more for reasons not + // understood?): + for (int parent_i = first_parent_index; parent_i < last_parent_index; + parent_i++) { + // first get the index from the parent + auto parent_MC_index = parent_ids.at(parent_i).index; + + // then go back to the original vector of MCParticles + auto parent = truth_particles.at(parent_MC_index); + + if (isW(parent)) { + // veto Ws from b-decays + if (isFromHadron(parent, parent_ids, truth_particles)) { + return false; + } + if (hasHiggsParent(parent, parent_ids, truth_particles)) { + return true; + } + } + } + return false; +} + +// check what type the Z decay is: to ll or vv +int AnalysisFCChh::checkZDecay( + edm4hep::MCParticleData truth_Z, + ROOT::VecOps::RVec daughter_ids, + ROOT::VecOps::RVec truth_particles) { + + auto first_child_index = truth_Z.daughters_begin; + auto last_child_index = truth_Z.daughters_end; + + if (last_child_index - first_child_index != 2) { + std::cout << "Error in checkZDecay! Found more or fewer than exactly 2 " + "daughters of a Z boson - this is not expected by code. Need " + "to implement a solution still!" + << std::endl; + return 0; + } + + // now get the indices in the daughters vector + auto child_1_MC_index = daughter_ids.at(first_child_index).index; + auto child_2_MC_index = daughter_ids.at(last_child_index - 1).index; + + // std::cout << "Daughters run from: " << child_1_MC_index << " to " << + // child_2_MC_index << std::endl; + + // then go back to the original vector of MCParticles + auto child_1 = truth_particles.at(child_1_MC_index); + auto child_2 = truth_particles.at(child_2_MC_index); + + if (isLep(child_1) && isLep(child_2)) { + return 1; + } else if (isNeutrino(child_1) && isNeutrino(child_2)) { + return 2; + } else { + std::cout << "Found different decay of Z boson than 2 leptons (e or mu), " + "neutrinos or taus! Please check." + << std::endl; + return 0; + } +} + +// check what type the W decay is: to lv or qq +int AnalysisFCChh::checkWDecay( + edm4hep::MCParticleData truth_W, + ROOT::VecOps::RVec daughter_ids, + ROOT::VecOps::RVec truth_particles) { + + auto first_child_index = truth_W.daughters_begin; + auto last_child_index = truth_W.daughters_end; + auto children_size = last_child_index - first_child_index; + + if (children_size < 1) { + std::cout + << "Error in checkWDecay! Checking W with no daughters, returning 0." + << std::endl; + return 0; + } + + // have at least 1 child -> if its also a W, continue the chain -> skip the + // intermediate Ws, and those that radiated photon + auto child_1_index = daughter_ids.at(first_child_index).index; + auto child_1 = truth_particles.at(child_1_index); + + // if the child of the W is also a W, use that one + if (isW(child_1)) { + return checkWDecay(child_1, daughter_ids, truth_particles); + } + + if (children_size != 2) { + std::cout << "Error in checkWDecay! Found unexpected W decay, please check." + << std::endl; + + return 0; + } + + // now get the indices in the daughters vector + // auto child_1_MC_index = daughter_ids.at(first_child_index).index; + auto child_2_MC_index = daughter_ids.at(last_child_index - 1).index; + + // std::cout << "Daughters run from: " << child_1_MC_index << " to " << + // child_2_MC_index << std::endl; + + // then go back to the original vector of MCParticles + // auto child_1 = truth_particles.at(child_1_MC_index); + auto child_2 = truth_particles.at(child_2_MC_index); + + // debug + // std::cout << "Checking W decay: PDGID 1 = " << child_1.PDG << " and PDGID + // 2 = " << child_2.PDG << std::endl; + + if (isLep(child_1) && isNeutrino(child_2)) { + return 1; + } else if (isNeutrino(child_1) && isLep(child_2)) { + return 1; + } else if (isQuark(child_1) && isQuark(child_2)) { + return 2; + } + + else { + std::cout << "Found different decay of W boson than lv or qq! Please check." + << std::endl; + std::cout << "PDGID 1 = " << child_1.PDG << " and PDGID 2 = " << child_2.PDG + << std::endl; + return 0; + } +} + +// check top decay: use to see fraction of dileptionic ttbar events at pre-sel +int AnalysisFCChh::findTopDecayChannel( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids) { + + // std::cout << "Checking truth ttbar decay type" << std::endl; + + // std::vector top_list; // do i need this? + int ttbar_decay_type = 1; + + for (auto &truth_part : truth_particles) { + if (isTop(truth_part)) { + // check what children the top has: + auto first_child_index = truth_part.daughters_begin; + auto last_child_index = truth_part.daughters_end; + + auto children_size = last_child_index - first_child_index; + + // skip intermediate tops that just have another top as children + if (last_child_index - first_child_index != 2) { + continue; + } + + // get the pdg ids of the children + // now get the indices in the daughters vector + auto child_1_MC_index = daughter_ids.at(first_child_index).index; + auto child_2_MC_index = daughter_ids.at(last_child_index - 1).index; + + // then go back to the original vector of MCParticles + auto child_1 = truth_particles.at(child_1_MC_index); + auto child_2 = truth_particles.at(child_2_MC_index); + + // std::cout << "PDG ID of first child: " << child_1.PDG << std::endl; + // std::cout << "PDG ID of second child: " << child_2.PDG << std::endl; + + // skip the case where a top radiated a gluon + if (isTop(child_1) || isTop(child_2)) { + continue; + } + + int w_decay_type = 0; + + if (isW(child_1) && !isW(child_2)) { + w_decay_type = checkWDecay(child_1, daughter_ids, truth_particles); + } + + else if (isW(child_2)) { + w_decay_type = checkWDecay(child_2, daughter_ids, truth_particles); + } + + else { + std::cout << "Warning! Found two or Ws from top decay. Please check. " + "Skipping top." + << std::endl; + continue; + } + + // std::cout << "Code for W decay is = " << w_decay_type << std::endl; + + // codes from W decays are: 1 = W->lv and 2 W->qq, 0 for error + // ttbar code is the code W1*W2 so that its 0 in case of any error + // ttbar codes: 1=dileptonic, 2=semileptonic, 3=hadronic + ttbar_decay_type *= w_decay_type; + + // top_list.push_back(truth_part); + } + } + // std::cout << "Number of tops in event :" << top_list.size()<< std::endl; + // std::cout << "Code for ttbar decay is = " << ttbar_decay_type << std::endl; + return ttbar_decay_type; +} + +// check Higgs decay: use to see if can improve stats for single Higgs bkg with +// exclusive samples +int AnalysisFCChh::findHiggsDecayChannel( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids) { + + // std::cout << "Checking truth Higgs decay type" << std::endl; + + // std::vector higgs_list; // FOR DEBUG + + int higgs_decay_type = 0; + + for (auto &truth_part : truth_particles) { + if (isH(truth_part)) { + // check what children the top has: + auto first_child_index = truth_part.daughters_begin; + auto last_child_index = truth_part.daughters_end; + + auto children_size = last_child_index - first_child_index; + + // skip intermediate tops that just have another Higgs as children + if (last_child_index - first_child_index != 2) { + continue; + } + + // higgs_list.push_back(truth_part); + + // get the pdg ids of the children + // now get the indices in the daughters vector + auto child_1_MC_index = daughter_ids.at(first_child_index).index; + auto child_2_MC_index = daughter_ids.at(last_child_index - 1).index; + + // then go back to the original vector of MCParticles + auto child_1 = truth_particles.at(child_1_MC_index); + auto child_2 = truth_particles.at(child_2_MC_index); + + // std::cout << "Found Higgs with two children:" << std::endl; + // std::cout << "PDG ID of first child: " << child_1.PDG << std::endl; + // std::cout << "PDG ID of second child: " << child_2.PDG << std::endl; + + // to check + // //skip the case where a top radiated a gluon + // if (isTop(child_1) || isTop(child_2)){ + // continue; + // } + + // Higgs decay types: + // 1: Hbb, 2: HWW, 3: Hgg, 4: Htautau, 5: Hcc, 6:HZZ, 7:Hyy, 8:HZy, + // 9:Hmumu, 10:Hss + + if (isb(child_1) && isb(child_2)) { + higgs_decay_type = 1; + } + + else if (isW(child_1) && isW(child_2)) { + higgs_decay_type = 2; + } + + else if (isGluon(child_1) && isGluon(child_2)) { + higgs_decay_type = 3; + } + + else if (isTau(child_1) && isTau(child_2)) { + higgs_decay_type = 4; + } + + else if (isc(child_1) && isc(child_2)) { + higgs_decay_type = 5; + } + + else if (isZ(child_1) && isZ(child_2)) { + higgs_decay_type = 6; + } + + else if (isPhoton(child_1) && isPhoton(child_2)) { + higgs_decay_type = 7; + } + + else if ((isZ(child_1) && isPhoton(child_2)) || + (isZ(child_2) && isPhoton(child_1))) { + higgs_decay_type = 8; + } + + else if (isMuon(child_1) && isMuon(child_2)) { + higgs_decay_type = 9; + } + + else if (iss(child_1) && iss(child_2)) { + higgs_decay_type = 10; + } + + else { + std::cout << "Warning! Found unkown decay of Higgs!" << std::endl; + std::cout << "Pdg ids are " << child_1.PDG << " , " << child_2.PDG + << std::endl; + continue; + } + } + } + // std::cout << "Number of higgs in event :" << higgs_list.size()<< std::endl; + // std::cout << "Code for higgs decay is = " << higgs_decay_type << std::endl; + return higgs_decay_type; +} + +// truth filter used to get ZZ(llvv) events from the ZZ(llvv+4l+4v) inclusive +// signal samples +bool AnalysisFCChh::ZZllvvFilter( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids) { + // first scan through the truth particles to find Z bosons + std::vector z_list; + for (auto &truth_part : truth_particles) { + if (isZ(truth_part)) { + z_list.push_back(truth_part); + } + // Tau veto: + if (isTau(truth_part)) { + return false; + } + } + + // check how many Zs are in event and build the flag: + // std::cout << "Number of Zs" << z_list.size() << std::endl; + if (z_list.size() == 2) { + int z1_decay = checkZDecay(z_list.at(0), daughter_ids, truth_particles); + int z2_decay = checkZDecay(z_list.at(1), daughter_ids, truth_particles); + + int zz_decay_flag = z1_decay + z2_decay; + + // flags are Z(ll) =1 and Z(vv) =2, so flag for llvv is =3 (4l=2, 4v=4) + if (zz_decay_flag == 3) { + return true; + } else { + return false; + } + + } else { + return false; + } +} + +// truth filter used to get WW(lvlv) events from the inclusive bbWW samples +bool AnalysisFCChh::WWlvlvFilter( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids, + ROOT::VecOps::RVec parent_ids) { + // first scan through the truth particles to find Z bosons + std::vector w_list; + for (auto &truth_part : truth_particles) { + if (isW(truth_part) && + isFromHiggsDirect(truth_part, parent_ids, truth_particles)) { + w_list.push_back(truth_part); + } + // Tau veto: - actually probably doesnt work as intended, to revise!! + // if (isTau(truth_part)){ + // return false; + // } + } + + if (w_list.size() == 2) { + int w1_decay = checkWDecay(w_list.at(0), daughter_ids, truth_particles); + int w2_decay = checkWDecay(w_list.at(1), daughter_ids, truth_particles); + + int ww_decay_flag = w1_decay + w2_decay; + + // flags are W(lv) =1 and W(qq) =2, so flag for lvlvv is =2 (lvqq=3, 4q=4) + if (ww_decay_flag == 2) { + return true; + } else { + return false; + } + + } else { + return false; + } +} + +// find a Z->ll decay on truth level +ROOT::VecOps::RVec AnalysisFCChh::getTruthZll( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids) { + // find the Zs + ROOT::VecOps::RVec z_ll_list; + + for (auto &truth_part : truth_particles) { + + if (isZ(truth_part)) { + if (checkZDecay(truth_part, daughter_ids, truth_particles) == + 1) { // check if is Zll decay + z_ll_list.push_back(truth_part); + } + } + } + + return z_ll_list; +} + +// helper functions for reco particles: +TLorentzVector +AnalysisFCChh::getTLV_reco(edm4hep::ReconstructedParticleData reco_part) { + TLorentzVector tlv; + tlv.SetXYZM(reco_part.momentum.x, reco_part.momentum.y, reco_part.momentum.z, + reco_part.mass); + return tlv; +} + +// build a MET four momentum: +TLorentzVector +AnalysisFCChh::getTLV_MET(edm4hep::ReconstructedParticleData met_object) { + TLorentzVector tlv; + float met_pt = sqrt(met_object.momentum.x * met_object.momentum.x + + met_object.momentum.y * met_object.momentum.y); + tlv.SetPxPyPzE(met_object.momentum.x, met_object.momentum.y, 0., met_pt); + + // debug: + // std::cout << "Set MET 4-vector with pT = " << tlv.Pt() << " px = " << + // tlv.Px() << " , py = " << tlv.Py() << " , pz = " << tlv.Pz() << " , E = " + // << tlv.E() << " and m = " << tlv.M() << std::endl; + + return tlv; +} + +// truth particles +TLorentzVector AnalysisFCChh::getTLV_MC(edm4hep::MCParticleData MC_part) { + TLorentzVector tlv; + tlv.SetXYZM(MC_part.momentum.x, MC_part.momentum.y, MC_part.momentum.z, + MC_part.mass); + return tlv; +} + +ROOT::VecOps::RVec +AnalysisFCChh::merge_pairs(ROOT::VecOps::RVec pairs) { + ROOT::VecOps::RVec merged_pairs; + + for (auto &pair : pairs) { + TLorentzVector pair_tlv = pair.merged_TLV(); + + // //build a edm4hep reco particle from the pair: + edm4hep::ReconstructedParticleData pair_particle; + pair_particle.momentum.x = pair_tlv.Px(); + pair_particle.momentum.y = pair_tlv.Py(); + pair_particle.momentum.z = pair_tlv.Pz(); + pair_particle.mass = pair_tlv.M(); + + merged_pairs.push_back(pair_particle); + } + + return merged_pairs; +} + +// select only the first pair in a vector (and retun as vector with size 1, +// format needed for the rdf) +ROOT::VecOps::RVec +AnalysisFCChh::get_first_pair(ROOT::VecOps::RVec pairs) { + ROOT::VecOps::RVec first_pair; + + if (pairs.size()) { + first_pair.push_back(pairs.at(0)); + } + + return first_pair; +} + +// split the pair again: return only the first particle or second particle in +// the pairs - needed for getting eg. pT etc of the selected DFOS pair +ROOT::VecOps::RVec +AnalysisFCChh::get_first_from_pair(ROOT::VecOps::RVec pairs) { + + ROOT::VecOps::RVec first_particle; + + if (pairs.size()) { + // sort by pT first: + pairs.at(0).sort_by_pT(); + first_particle.push_back(pairs.at(0).particle_1); + } + + return first_particle; +} + +ROOT::VecOps::RVec +AnalysisFCChh::get_second_from_pair( + ROOT::VecOps::RVec pairs) { + + ROOT::VecOps::RVec second_particle; + + if (pairs.size()) { + pairs.at(0).sort_by_pT(); + second_particle.push_back(pairs.at(0).particle_2); + } + + return second_particle; +} + +// count pairs +int AnalysisFCChh::get_n_pairs(ROOT::VecOps::RVec pairs) { + return pairs.size(); +} + +// correct function to get jets with a certain tag (can b-tag, c-tag) - check +// delphes card of sample for which taggers are used +ROOT::VecOps::RVec +AnalysisFCChh::getJet_tag(ROOT::VecOps::RVec index, + ROOT::VecOps::RVec pid, + ROOT::VecOps::RVec values, int algoIndex) { + ROOT::VecOps::RVec result; + for (size_t i = 0; i < index.size(); ++i) { + auto v = + static_cast(values.at(pid.at(index.at(i)).parameters_begin)); + + result.push_back((v & (1u << algoIndex)) == (1u << algoIndex)); + } + return result; +} + +// return the list of c hadrons +ROOT::VecOps::RVec AnalysisFCChh::getChadron( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec parent_ids) { + + ROOT::VecOps::RVec c_had_list; + for (auto &truth_part : truth_particles) { + int num = int(abs(truth_part.PDG)); + if ((num > 400 && num < 500) || (num > 4000 && num < 5000)) { + c_had_list.push_back(truth_part); + } + } + return c_had_list; +} + +// return the list of b hadrons +ROOT::VecOps::RVec AnalysisFCChh::getBhadron( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec parent_ids) { + + ROOT::VecOps::RVec b_had_list; + for (auto &truth_part : truth_particles) { + int num = int(abs(truth_part.PDG)); + if ((num > 500 && num < 600) || (num > 5000 && num < 6000)) { + // int npos3, npos4; + // for(int i=0; i<3; i++){ + // npos3 = num%10; + // num = num/10; + // } + // num = int(abs(truth_part.PDG)); + // for(int i=0; i<4; i++){ + // npos4 = num%10; + // num = num/10; + // } + // if (npos3==5 || npos4==5){ + + // check also if from Higgs to count only from Higgs ones + // if ( !isFromHiggsDirect(truth_part, parent_ids, truth_particles) ){ + // continue; + // } + + b_had_list.push_back(truth_part); + } + } + + return b_had_list; +} + +// rewrite of functions to get tagged jets to work with updated edm4hep, +// https://github.com/key4hep/EDM4hep/pull/268 + +ROOT::VecOps::RVec +AnalysisFCChh::get_tagged_jets( + ROOT::VecOps::RVec jets, + ROOT::VecOps::RVec jet_tags, + ROOT::VecOps::RVec jet_tags_indices, + ROOT::VecOps::RVec jet_tags_values, int algoIndex) { + + ROOT::VecOps::RVec tagged_jets; + + // make sure we have the right collections: every tag should have exactly one + // jet index + assert(jet_tags.size() == jet_tags_indices.size()); + + for (size_t jet_tags_i = 0; jet_tags_i < jet_tags.size(); ++jet_tags_i) { + + const auto tag = static_cast( + jet_tags_values[jet_tags[jet_tags_i].parameters_begin]); + + if (tag & (1 << algoIndex)) { + tagged_jets.push_back(jets[jet_tags_indices[jet_tags_i].index]); + } + } + return tagged_jets; +} + +// return the full jets rather than the list of tags +// ROOT::VecOps::RVec +// AnalysisFCChh::get_tagged_jets(ROOT::VecOps::RVec +// jets, ROOT::VecOps::RVec index, +// ROOT::VecOps::RVec pid, ROOT::VecOps::RVec +// tag_values, int algoIndex){ + +// ROOT::VecOps::RVec tagged_jets; + +// // std::cout << "running AnalysisFCChh::get_tagged_jets() on jet +// collection of size" << jets.size() << std::endl; + +// for (size_t iJet = 0; iJet < jets.size(); ++iJet){ +// // for (auto & jet : jets){ +// // std::cout << jet.particles_begin << " to " << +// jet.particles_end << std::endl; +// // get the jet particle id index for the jet +// const auto jetIDIndex = index[jets[iJet].particles_begin]; +// // std::cout << "jet index = " << jetIDIndex << std::endl; +// const auto jetID = pid[jetIDIndex]; +// // get the tag value +// const auto tag = +// static_cast(tag_values[jetID.parameters_begin]); +// // std::cout << "Tag = " << tag << std::endl; +// // check if the tag satisfies what we want +// if (tag & (1 << algoIndex)) { +// tagged_jets.push_back(jets[iJet]); +// } +// } + +// return tagged_jets; +// } + +// same for tau tags: they are second entry in the +ROOT::VecOps::RVec +AnalysisFCChh::get_tau_jets( + ROOT::VecOps::RVec jets, + ROOT::VecOps::RVec index, + ROOT::VecOps::RVec pid, + ROOT::VecOps::RVec tag_values, int algoIndex) { + + ROOT::VecOps::RVec tagged_jets; + + // std::cout << "running AnalysisFCChh::get_tagged_jets() on jet collection of + // size" << jets.size() << std::endl; + + for (size_t iJet = 0; iJet < jets.size(); ++iJet) { + // for (auto & jet : jets){ + // std::cout << jet.particleIDs_begin << " to " << jet.particleIDs_end << + // std::endl; get the jet particle id index for the jet + const auto jetIDIndex = index[jets[iJet].particles_begin]; + // std::cout << "jet index = " << jetIDIndex << std::endl; + const auto jetID = pid[jetIDIndex]; + // get the tag value + const auto tag = + static_cast(tag_values[jetID.parameters_end - 1]); + // std::cout << "Tag = " << tag << std::endl; + // check if the tag satisfies what we want + if (tag & (1 << algoIndex)) { + tagged_jets.push_back(jets[iJet]); + } + } + + return tagged_jets; +} + +// complementary: return the all jets that do not have the requested tag +ROOT::VecOps::RVec +AnalysisFCChh::get_untagged_jets( + ROOT::VecOps::RVec jets, + ROOT::VecOps::RVec index, + ROOT::VecOps::RVec pid, + ROOT::VecOps::RVec tag_values, int algoIndex) { + + ROOT::VecOps::RVec untagged_jets; + + // std::cout << "running AnalysisFCChh::get_tagged_jets() on jet collection of + // size" << jets.size() << std::endl; + + for (size_t iJet = 0; iJet < jets.size(); ++iJet) { + // for (auto & jet : jets){ + // std::cout << jet.particleIDs_begin << " to " << jet.particleIDs_end << + // std::endl; get the jet particle id index for the jet + const auto jetIDIndex = index[jets[iJet].particles_begin]; + // std::cout << "jet index = " << jetIDIndex << std::endl; + const auto jetID = pid[jetIDIndex]; + // get the tag value + const auto tag = static_cast(tag_values[jetID.parameters_begin]); + // std::cout << "Tag = " << tag << std::endl; + // check if the tag satisfies what we want + if (!(tag & (1 << algoIndex))) { + untagged_jets.push_back(jets[iJet]); + } + } + + return untagged_jets; +} + +// select objects that are isolated from the other objects with given dR +// threshold +ROOT::VecOps::RVec +AnalysisFCChh::sel_isolated( + ROOT::VecOps::RVec sel_parts, + ROOT::VecOps::RVec check_parts, + float dR_thres) { + + ROOT::VecOps::RVec isolated_parts; + + for (auto &sel_part : sel_parts) { + bool is_isolated = true; + TLorentzVector sel_part_tlv = getTLV_reco(sel_part); + // std::cout << "TLV found with pT() =" << sel_part_tlv.Pt() << std::endl; + + // loop over all particles to check against and see if any are within the dR + // threshold + for (auto &check_part : check_parts) { + TLorentzVector check_part_tlv = getTLV_reco(check_part); + float dR_val = sel_part_tlv.DeltaR(check_part_tlv); + + if (dR_val <= dR_thres) { + is_isolated = false; + } + + check_part_tlv.Clear(); + } + + sel_part_tlv.Clear(); + + if (is_isolated) { + isolated_parts.push_back(sel_part); + } + } + + return isolated_parts; +} + +// find the lepton pair that likely originates from a Z decay: +ROOT::VecOps::RVec AnalysisFCChh::getOSPairs( + ROOT::VecOps::RVec leptons_in) { + + ROOT::VecOps::RVec OS_pairs; + + // need at least 2 leptons in the input + if (leptons_in.size() < 2) { + return OS_pairs; + } + + // separate the leptons by charges + ROOT::VecOps::RVec leptons_pos; + ROOT::VecOps::RVec leptons_neg; + + for (auto &lep : leptons_in) { + auto charge = lep.charge; + if (charge > 0) { + leptons_pos.push_back(lep); + } else if (charge < 0) { + leptons_neg.push_back(lep); + } + + else { + std::cout << "Error in function AnalysisFCChh::getOSPair() - found " + "neutral particle! Function is supposed to be used for " + "electrons or muons only." + << std::endl; + } + } + + // std::cout << "Found leptons: " << leptons_pos.size() << " pos, " << + // leptons_neg.size() << " neg." << std::endl; + + // check charges: if don't have one of each, cannot build OS pair and if is + // only one of each there is no ambiguity + if (leptons_pos.size() < 1 || leptons_neg.size() < 1) { + return OS_pairs; + } + + // std::cout << "Have enough leptons to build pair!" << std::endl; + + // build all possible pairs + // TLorentzVector OS_pair_tlv; + + for (auto &lep_pos : leptons_pos) { + // sum up the momentum components to get the TLV of the OS pair: first the + // positive one + // TLorentzVector lep_pos_tlv = getTLV_reco(lep_pos); + + for (auto &lep_neg : leptons_neg) { + // TLorentzVector lep_neg_tlv = getTLV_reco(lep_neg); + // TLorentzVector OS_pair_tlv = lep_pos_tlv+lep_neg_tlv; + + // //build a edm4hep rco particle from the os pair: + // edm4hep::ReconstructedParticleData OS_pair; + // OS_pair.momentum.x = OS_pair_tlv.Px(); + // OS_pair.momentum.y = OS_pair_tlv.Py(); + // OS_pair.momentum.z = OS_pair_tlv.Pz(); + // OS_pair.mass = OS_pair_tlv.M(); + + // new code: do not merge the pair but store them separately + RecoParticlePair OS_pair; + OS_pair.particle_1 = lep_pos; + OS_pair.particle_2 = lep_neg; + + OS_pairs.push_back(OS_pair); + } + } + + // FOR DEBUG? + // if (OS_pairs.size() > 1){ + // std::cout << "Number of possible OS pairs: " << OS_pairs.size() << + // std::endl; std::cout << "Build from: " << leptons_pos.size() << " + // pos, " + // << leptons_neg.size() << " neg." << std::endl; + // } + + return OS_pairs; +} + +// pick the pair that is closest to Z mass: +ROOT::VecOps::RVec AnalysisFCChh::getBestOSPair( + ROOT::VecOps::RVec electron_pairs, + ROOT::VecOps::RVec muon_pairs) { + + ROOT::VecOps::RVec best_pair; + + // std::cout << "N_elec_pairs = " << electron_pairs.size() << ", N_muon_pairs + // = " << muon_pairs.size() << std::endl; + + // check if any pairs in input: + if (electron_pairs.size() == 0 && muon_pairs.size() == 0) { + return best_pair; + } + + // if only one pair in input, return that one: + else if (electron_pairs.size() == 1 && muon_pairs.size() == 0) { + best_pair.push_back(electron_pairs.at(0)); + return best_pair; + } + + else if (electron_pairs.size() == 0 && muon_pairs.size() == 1) { + best_pair.push_back(muon_pairs.at(0)); + return best_pair; + } + + // if there are mor options, pick the one that is closest to Z mass + + const double Z_mass = 91.1876; + + // make a vector with both electron and muons pairs in it: + ROOT::VecOps::RVec all_pairs; + for (auto &elec_pair : electron_pairs) { + all_pairs.push_back(elec_pair); + } + for (auto &muon_pair : muon_pairs) { + all_pairs.push_back(muon_pair); + } + + // from Clement's main code: use std::sort on the mass difference + auto resonancesort = [&](RecoParticlePair i, RecoParticlePair j) { + return (abs(Z_mass - i.merged_TLV().M()) < + abs(Z_mass - j.merged_TLV().M())); + }; + // auto resonancesort = [&] (edm4hep::ReconstructedParticleData i + // ,edm4hep::ReconstructedParticleData j) { return (abs( Z_mass + // -i.mass) AnalysisFCChh::getLeadingPair( + ROOT::VecOps::RVec electron_pairs, + ROOT::VecOps::RVec muon_pairs) { + + ROOT::VecOps::RVec best_pair; + + // std::cout << "N_elec_pairs = " << electron_pairs.size() << ", N_muon_pairs + // = " << muon_pairs.size() << std::endl; + + // check if any pairs in input: + if (electron_pairs.size() == 0 && muon_pairs.size() == 0) { + return best_pair; + } + + // if only one pair in input, return that one: + else if (electron_pairs.size() == 1 && muon_pairs.size() == 0) { + best_pair.push_back(electron_pairs.at(0)); + return best_pair; + } + + else if (electron_pairs.size() == 0 && muon_pairs.size() == 1) { + best_pair.push_back(muon_pairs.at(0)); + return best_pair; + } + + // have at least one of each + // make a vector with both electron and muons pairs in it: + ROOT::VecOps::RVec all_pairs; + for (auto &elec_pair : electron_pairs) { + all_pairs.push_back(elec_pair); + } + for (auto &muon_pair : muon_pairs) { + all_pairs.push_back(muon_pair); + } + + // take the combined pT to sort + auto pTll_sort = [&](RecoParticlePair i, RecoParticlePair j) { + return (abs(i.merged_TLV().Pt()) > abs(j.merged_TLV().Pt())); + }; + std::sort(all_pairs.begin(), all_pairs.end(), pTll_sort); + + best_pair.push_back(all_pairs.at(0)); + + return best_pair; +} + +// build all possible emu OS combinations, for eg tautau and ww analysis +ROOT::VecOps::RVec AnalysisFCChh::getDFOSPairs( + ROOT::VecOps::RVec electrons_in, + ROOT::VecOps::RVec muons_in) { + + ROOT::VecOps::RVec DFOS_pairs; + + // need at least 2 leptons in the input + if (electrons_in.size() < 1 || muons_in.size() < 1) { + return DFOS_pairs; + } + + // sort the vectors by size, so that the first pair is always the leading + auto sort_by_pT = [&](edm4hep::ReconstructedParticleData part_i, + edm4hep::ReconstructedParticleData part_j) { + return (getTLV_reco(part_i).Pt() > getTLV_reco(part_j).Pt()); + }; + std::sort(electrons_in.begin(), electrons_in.end(), sort_by_pT); + std::sort(muons_in.begin(), muons_in.end(), sort_by_pT); + + // loop over the electrons and make a pair if a muons with opposite charge is + // found + + for (auto &elec : electrons_in) { + for (auto &muon : muons_in) { + auto total_charge = elec.charge + muon.charge; + if (total_charge == 0) { + // std::cout << "found DFOS pair!" << std::endl; + RecoParticlePair DFOS_pair; + DFOS_pair.particle_1 = elec; + DFOS_pair.particle_2 = muon; + + DFOS_pairs.push_back(DFOS_pair); + } + } + } + + // debug + // if (DFOS_pairs.size() > 1){ + // std::cout << "Number of possible DFOS pairs: " << DFOS_pairs.size() << + // std::endl; std::cout << "Build from: " << electrons_in.size() << " + // electrons, " << muons_in.size() << " muons" << std::endl; + // } + + return DFOS_pairs; +} + +// SortParticleCollection +// +ROOT::VecOps::RVec +AnalysisFCChh::SortParticleCollection( + ROOT::VecOps::RVec particles_in) { + if (particles_in.size() < 2) { + return particles_in; + } else { + auto sort_by_pT = [&](edm4hep::ReconstructedParticleData part_i, + edm4hep::ReconstructedParticleData part_j) { + return (getTLV_reco(part_i).Pt() > getTLV_reco(part_j).Pt()); + }; + std::sort(particles_in.begin(), particles_in.end(), sort_by_pT); + return particles_in; + } +} +// build all pairs from the input particles -> this returns the pair made of pT +// leading particles!!! +ROOT::VecOps::RVec AnalysisFCChh::getPairs( + ROOT::VecOps::RVec particles_in) { + + ROOT::VecOps::RVec pairs; + + // need at least 2 particles in the input + if (particles_in.size() < 2) { + return pairs; + } + + // else sort them by pT, and take the only the leading pair + else { + auto sort_by_pT = [&](edm4hep::ReconstructedParticleData part_i, + edm4hep::ReconstructedParticleData part_j) { + return (getTLV_reco(part_i).Pt() > getTLV_reco(part_j).Pt()); + }; + std::sort(particles_in.begin(), particles_in.end(), sort_by_pT); + + // old method + // TLorentzVector tlv_1 = getTLV_reco(particles_in.at(0)); + // TLorentzVector tlv_2 = getTLV_reco(particles_in.at(1)); + + // TLorentzVector tlv_pair = tlv_1+tlv_2; + + // edm4hep::ReconstructedParticleData pair; + // pair.momentum.x = tlv_pair.Px(); + // pair.momentum.y = tlv_pair.Py(); + // pair.momentum.z = tlv_pair.Pz(); + // pair.mass = tlv_pair.M(); + + // new method, dont merge the pair + RecoParticlePair pair; + pair.particle_1 = particles_in.at(0); + pair.particle_2 = particles_in.at(1); + + pairs.push_back(pair); + } + + return pairs; +} + +// same for MC particle +ROOT::VecOps::RVec AnalysisFCChh::getPairs( + ROOT::VecOps::RVec particles_in) { + + ROOT::VecOps::RVec pairs; + + // need at least 2 particles in the input + if (particles_in.size() < 2) { + return pairs; + } + + // else sort them by pT, and take the only the leading pair + else { + auto sort_by_pT = [&](edm4hep::MCParticleData part_i, + edm4hep::MCParticleData part_j) { + return (getTLV_MC(part_i).Pt() > getTLV_MC(part_j).Pt()); + }; + std::sort(particles_in.begin(), particles_in.end(), sort_by_pT); + + // new method, dont merge the pair + MCParticlePair pair; + pair.particle_1 = particles_in.at(0); + pair.particle_2 = particles_in.at(1); + + pairs.push_back(pair); + } + + return pairs; +} + +// make the subleading pair, ie. from particles 3 and 4 in pT order +ROOT::VecOps::RVec AnalysisFCChh::getPair_sublead( + ROOT::VecOps::RVec particles_in) { + + ROOT::VecOps::RVec pairs; + + // need at least 2 particles in the input + if (particles_in.size() < 4) { + return pairs; + } + + // else sort them by pT, and take the only the subleading pair + else { + auto sort_by_pT = [&](edm4hep::ReconstructedParticleData part_i, + edm4hep::ReconstructedParticleData part_j) { + return (getTLV_reco(part_i).Pt() > getTLV_reco(part_j).Pt()); + }; + std::sort(particles_in.begin(), particles_in.end(), sort_by_pT); + + // new method, dont merge the pair + RecoParticlePair pair; + pair.particle_1 = particles_in.at(2); + pair.particle_2 = particles_in.at(3); + + pairs.push_back(pair); + } + + return pairs; +} + +// calculate the transverse mass ob two objects: massless approximation? +ROOT::VecOps::RVec AnalysisFCChh::get_mT( + ROOT::VecOps::RVec Z_ll_pair, + ROOT::VecOps::RVec MET_obj) { + + ROOT::VecOps::RVec mT_vector; + + // if one of the input particles is empty, just fill a default value of -999 + // as mT + if (Z_ll_pair.size() < 1 || MET_obj.size() < 1) { + mT_vector.push_back(-999.); + return mT_vector; + } + + // else, for now, just take the first of each, should be the "best" one (by + // user input) - flexibility to use all combinations is there, to be + // implemented if needed + auto Z_ll = Z_ll_pair.at(0); + auto MET = MET_obj.at(0); + + // Z_ll is fully reconstructed and regular 4 vector + TLorentzVector tlv_Zll = getTLV_reco(Z_ll); + float pT_ll = tlv_Zll.Pt(); + TVector3 vec_pT_ll; + vec_pT_ll.SetXYZ(Z_ll.momentum.x, Z_ll.momentum.y, 0.); + + // for MET take the components separately: absolute MET pt and the x and y + // component in a vector: + TLorentzVector tlv_met = getTLV_MET(MET); + float pT_met = tlv_met.Pt(); + TVector3 vec_pT_met; + vec_pT_met.SetXYZ(MET.momentum.x, MET.momentum.y, 0.); + + float mT = sqrt(2. * pT_ll * pT_met * + (1 - cos(abs(vec_pT_ll.DeltaPhi(vec_pT_met))))); + + mT_vector.push_back(mT); + + // std::cout << "Debug mT: mT with old func = " << mT << std::endl; + + return mT_vector; +} + +// different definition -> in tests it agreed 100% with previous def, keep for +// reference +ROOT::VecOps::RVec AnalysisFCChh::get_mT_new( + ROOT::VecOps::RVec vis_part, + ROOT::VecOps::RVec MET_obj) { + + ROOT::VecOps::RVec mT_vector; + + // if one of the input particles is empty, just fill a default value of -999 + // as mT + if (vis_part.size() < 1 || MET_obj.size() < 1) { + mT_vector.push_back(-999.); + return mT_vector; + } + + // else, for now, just take the first of each, should be the "best" one (by + // user input) - flexibility to use all combinations is there, to be + // implemented if needed + auto visible_particle = vis_part.at(0); + auto MET = MET_obj.at(0); + + TLorentzVector tlv_vis = getTLV_reco(visible_particle); + float pT_vis = tlv_vis.Pt(); + TVector3 vec_pT_vis; + vec_pT_vis.SetXYZ(visible_particle.momentum.x, visible_particle.momentum.y, + 0.); + + // for MET take the components separately: absolute MET pt and the x and y + // component in a vector: + TLorentzVector tlv_met = getTLV_MET(MET); + float pT_met = tlv_met.Pt(); + TVector3 vec_pT_met; + vec_pT_met.SetXYZ(MET.momentum.x, MET.momentum.y, 0.); + + float mt_term1 = (pT_vis + pT_met) * (pT_vis + pT_met); + float mt_term2 = (vec_pT_vis + vec_pT_met).Mag2(); + + float mT = sqrt(mt_term1 - mt_term2); + + mT_vector.push_back(mT); + + // std::cout << "Debug mT: mT with new func = " << mT << std::endl; + + return mT_vector; +} + +// pseudo-invariant mass - see CMS paper, PHYS. REV. D 102, 032003 (2020) +ROOT::VecOps::RVec AnalysisFCChh::get_m_pseudo( + ROOT::VecOps::RVec Z_ll_pair, + ROOT::VecOps::RVec MET_obj) { + + ROOT::VecOps::RVec m_pseudo_vector; + + // if one of the input particles is empty, just fill a default value of -999 + // as mT + if (Z_ll_pair.size() < 1 || MET_obj.size() < 1) { + m_pseudo_vector.push_back(-999.); + return m_pseudo_vector; + } + + TLorentzVector tlv_Zll = getTLV_reco(Z_ll_pair.at(0)); + TLorentzVector tlv_MET = getTLV_MET(MET_obj.at(0)); + + TLorentzVector tlv_H_pseudo = tlv_Zll + tlv_MET; + + m_pseudo_vector.push_back(tlv_H_pseudo.M()); + + return m_pseudo_vector; +} + +// pseudo-transverse mass - see CMS paper, PHYS. REV. D 102, 032003 (2020) +ROOT::VecOps::RVec AnalysisFCChh::get_mT_pseudo( + ROOT::VecOps::RVec Z_ll_pair, + ROOT::VecOps::RVec MET_obj) { + + ROOT::VecOps::RVec m_pseudo_vector; + + // if one of the input particles is empty, just fill a default value of -999 + // as mT + if (Z_ll_pair.size() < 1 || MET_obj.size() < 1) { + m_pseudo_vector.push_back(-999.); + return m_pseudo_vector; + } + + TLorentzVector tlv_Zll = getTLV_reco(Z_ll_pair.at(0)); + TLorentzVector tlv_MET = getTLV_MET(MET_obj.at(0)); + + TLorentzVector tlv_H_pseudo = tlv_Zll + tlv_MET; + + m_pseudo_vector.push_back(sqrt(tlv_H_pseudo.E() * tlv_H_pseudo.E() - + tlv_H_pseudo.Pz() * tlv_H_pseudo.Pz())); + + return m_pseudo_vector; +} + +// try the stransverse mass as defined in arXiv:1411.4312 +// ROOT::VecOps::RVec +// AnalysisFCChh::get_mT2(ROOT::VecOps::RVec +// particle_1, ROOT::VecOps::RVec +// particle_2, ROOT::VecOps::RVec MET_obj){ + +// asymm_mt2_lester_bisect::disableCopyrightMessage(); + +// ROOT::VecOps::RVec m_strans_vector; + +// //if one of the input particles is empty, just fill a default value of +// -999 as mT if (particle_1.size() < 1 || particle_2.size() < 1|| +// MET_obj.size() < 1 ){ m_strans_vector.push_back(-999.); +// return m_strans_vector; +// } + +// TLorentzVector tlv_vis1 = getTLV_reco(particle_1.at(0)); +// TLorentzVector tlv_vis2 = getTLV_reco(particle_2.at(0)); +// TLorentzVector tlv_met = getTLV_MET(MET_obj.at(0)); + +// // std::cout << "Part1 : Compare TLV w. direct for px:" << tlv_vis1.Px() +// << " vs " << particle_1.at(0).momentum.x << std::endl; +// // std::cout << "Part1 : Compare TLV w. direct for py:" << tlv_vis1.Py() +// << " vs " << particle_1.at(0).momentum.y << std::endl; + +// // std::cout << "Part2 : Compare TLV w. direct for px:" << tlv_vis2.Px() +// << " vs " << particle_2.at(0).momentum.x << std::endl; +// // std::cout << "Part2 : Compare TLV w. direct for px:" << tlv_vis2.Py() +// << " vs " << particle_2.at(0).momentum.y << std::endl; + +// // std::cout << "MET : Compare TLV w. direct for px:" << tlv_met.Px() << +// " vs " << MET_obj.at(0).momentum.x << std::endl; +// // std::cout << "MET : Compare TLV w. direct for px:" << tlv_met.Py() << +// " vs " << MET_obj.at(0).momentum.y << std::endl; + +// double MT2 = asymm_mt2_lester_bisect::get_mT2( +// tlv_vis1.M(), tlv_vis1.Px(), tlv_vis1.Py(), +// tlv_vis2.M(), tlv_vis2.Px(), tlv_vis2.Py(), +// tlv_met.Px(), tlv_met.Py(), +// 0., 0.); + +// // define our inputs: + +// m_strans_vector.push_back(MT2); + +// return m_strans_vector; +// } + +// // stransverse mass but fixing the two visible to higgs masses +// ROOT::VecOps::RVec +// AnalysisFCChh::get_mT2_125(ROOT::VecOps::RVec +// particle_1, ROOT::VecOps::RVec +// particle_2, ROOT::VecOps::RVec MET_obj){ + +// asymm_mt2_lester_bisect::disableCopyrightMessage(); + +// ROOT::VecOps::RVec m_strans_vector; + +// //if one of the input particles is empty, just fill a default value of +// -999 as mT if (particle_1.size() < 1 || particle_2.size() < 1|| +// MET_obj.size() < 1 ){ m_strans_vector.push_back(-999.); +// return m_strans_vector; +// } + +// TLorentzVector tlv_vis1 = getTLV_reco(particle_1.at(0)); +// TLorentzVector tlv_vis2 = getTLV_reco(particle_2.at(0)); +// TLorentzVector tlv_met = getTLV_MET(MET_obj.at(0)); + +// double MT2 = asymm_mt2_lester_bisect::get_mT2( +// 125., tlv_vis1.Px(), tlv_vis1.Py(), +// 125., tlv_vis2.Px(), tlv_vis2.Py(), +// tlv_met.Px(), tlv_met.Py(), +// 0., 0.); + +// // define our inputs: + +// m_strans_vector.push_back(MT2); + +// return m_strans_vector; +// } + +// HT2 variable as in ATLAS bblvlv analysis +ROOT::VecOps::RVec AnalysisFCChh::get_HT2( + ROOT::VecOps::RVec particle_1, + ROOT::VecOps::RVec particle_2) { + + ROOT::VecOps::RVec HT2_vector; + + if (particle_1.size() < 1 || particle_2.size() < 1) { + HT2_vector.push_back(-999.); + return HT2_vector; + } + + TLorentzVector tlv_1 = getTLV_reco(particle_1.at(0)); + TLorentzVector tlv_2 = getTLV_reco(particle_2.at(0)); + + // scalar sum + float HT2 = tlv_1.Pt() + tlv_2.Pt(); + HT2_vector.push_back(HT2); + return HT2_vector; +} + +// HT_w_inv = scalar sum of all pT from objects of a HH->bblvlv decay as used in +// ATLAS paper for the HT ratio +ROOT::VecOps::RVec AnalysisFCChh::get_HT_wInv( + ROOT::VecOps::RVec MET, + ROOT::VecOps::RVec ll_pair, + ROOT::VecOps::RVec bb_pair) { + + ROOT::VecOps::RVec HT_wInv_vector; + + if (MET.size() < 1 || ll_pair.size() < 1 || bb_pair.size() < 1) { + HT_wInv_vector.push_back(-999.); + return HT_wInv_vector; + } + + // if all objects are there, get the first entry in vector always (should be + // leading) and take the pTs + float MET_pT = getTLV_MET(MET.at(0)).Pt(); + + float lep1_pT = getTLV_reco(ll_pair.at(0).particle_1).Pt(); + float lep2_pT = getTLV_reco(ll_pair.at(0).particle_2).Pt(); + + float b1_pT = getTLV_reco(bb_pair.at(0).particle_1).Pt(); + float b2_pT = getTLV_reco(bb_pair.at(0).particle_2).Pt(); + + // and sum .. + float HT_w_inv = MET_pT + lep1_pT + lep2_pT + b1_pT + b2_pT; + HT_wInv_vector.push_back(HT_w_inv); + return HT_wInv_vector; +} + +// get the true HT = scalar sum of only the visible objects, here the bs and the +// leptons (true HT in contrast to the HT with the MET) +ROOT::VecOps::RVec +AnalysisFCChh::get_HT_true(ROOT::VecOps::RVec ll_pair, + ROOT::VecOps::RVec bb_pair) { + + ROOT::VecOps::RVec HT_wInv_vector; + + if (ll_pair.size() < 1 || bb_pair.size() < 1) { + HT_wInv_vector.push_back(-999.); + return HT_wInv_vector; + } + + // if all objects are there, get the first entry in vector always (should be + // leading) and take the pTs + float lep1_pT = getTLV_reco(ll_pair.at(0).particle_1).Pt(); + float lep2_pT = getTLV_reco(ll_pair.at(0).particle_2).Pt(); + + float b1_pT = getTLV_reco(bb_pair.at(0).particle_1).Pt(); + float b2_pT = getTLV_reco(bb_pair.at(0).particle_2).Pt(); + + // and sum .. + float HT_w_inv = lep1_pT + lep2_pT + b1_pT + b2_pT; + HT_wInv_vector.push_back(HT_w_inv); + return HT_wInv_vector; +} + +// construct ratio of HT2 and HT_w_inv +ROOT::VecOps::RVec +AnalysisFCChh::get_HT2_ratio(ROOT::VecOps::RVec HT2, + ROOT::VecOps::RVec HT_wInv) { + + ROOT::VecOps::RVec HT2_ratio_vector; + + if (HT2.size() < 1 || HT_wInv.size() < 1) { + HT2_ratio_vector.push_back(-999.); + return HT2_ratio_vector; + } + + float HT2_ratio = HT2.at(0) / HT_wInv.at(0); + HT2_ratio_vector.push_back(HT2_ratio); + return HT2_ratio_vector; +} + +// construct met signifcance as ratio of MET pt and true HT +ROOT::VecOps::RVec AnalysisFCChh::get_MET_significance( + ROOT::VecOps::RVec MET, + ROOT::VecOps::RVec HT_true, bool doSqrt) { + + ROOT::VecOps::RVec MET_sig_vector; + + if (MET.size() < 1 || HT_true.size() < 1) { + MET_sig_vector.push_back(-999.); + return MET_sig_vector; + } + + float MET_pt = getTLV_MET(MET.at(0)).Pt(); + + if (doSqrt) { + MET_sig_vector.push_back(MET_pt / sqrt(HT_true.at(0))); + } else { + MET_sig_vector.push_back(MET_pt / HT_true.at(0)); + } + + return MET_sig_vector; +} + +// helper function which merges two particles into one using the TLVs +ROOT::VecOps::RVec +AnalysisFCChh::merge_parts_TLVs( + ROOT::VecOps::RVec particle_1, + ROOT::VecOps::RVec particle_2) { + ROOT::VecOps::RVec out_vector; + + // if one of the input particles is empty, return an empty vector + if (particle_1.size() < 1 || particle_2.size() < 1) { + // std::cout << "Warning in AnalysisFCChh::merge_parts_TLVs - one input + // vector is empty, returning an empty vector." << std::endl; + return out_vector; + } + + // else, for now, just take the first of each, should be the "best" one (by + // user input) - flexibility to use all combinations is there, to be + // implemented if needed + TLorentzVector tlv_1 = getTLV_reco(particle_1.at(0)); + TLorentzVector tlv_2 = getTLV_reco(particle_2.at(0)); + + TLorentzVector tlv_merged = tlv_1 + tlv_2; + + edm4hep::ReconstructedParticleData particle_merged; + particle_merged.momentum.x = tlv_merged.Px(); + particle_merged.momentum.y = tlv_merged.Py(); + particle_merged.momentum.z = tlv_merged.Pz(); + particle_merged.mass = tlv_merged.M(); + + out_vector.push_back(particle_merged); + + return out_vector; +} + +// same as above, overloaded for MCParticles +ROOT::VecOps::RVec AnalysisFCChh::merge_parts_TLVs( + ROOT::VecOps::RVec particle_1, + ROOT::VecOps::RVec particle_2) { + ROOT::VecOps::RVec out_vector; + + // if one of the input particles is empty, return an empty vector + if (particle_1.size() < 1 || particle_2.size() < 1) { + // std::cout << "Warning in AnalysisFCChh::merge_parts_TLVs - one input + // vector is empty, returning an empty vector." << std::endl; + return out_vector; + } + + // else, for now, just take the first of each, should be the "best" one (by + // user input) - flexibility to use all combinations is there, to be + // implemented if needed + TLorentzVector tlv_1 = getTLV_MC(particle_1.at(0)); + TLorentzVector tlv_2 = getTLV_MC(particle_2.at(0)); + + TLorentzVector tlv_merged = tlv_1 + tlv_2; + + edm4hep::MCParticleData particle_merged; + particle_merged.momentum.x = tlv_merged.Px(); + particle_merged.momentum.y = tlv_merged.Py(); + particle_merged.momentum.z = tlv_merged.Pz(); + particle_merged.mass = tlv_merged.M(); + + out_vector.push_back(particle_merged); + + return out_vector; +} + +// combine one lepton with one b-jet each, in case of ttbar events this should +// reconstruct the visible top + +// find lb pairs with smallest average +ROOT::VecOps::RVec +AnalysisFCChh::make_lb_pairing(ROOT::VecOps::RVec lepton_pair, + ROOT::VecOps::RVec bb_pair) { + + ROOT::VecOps::RVec out_vector; + RecoParticlePair lb_pair_1; + RecoParticlePair lb_pair_2; + + // if one of the input particles is empty, return an empty vector + if (lepton_pair.size() < 1 || bb_pair.size() < 1) { + return out_vector; + } + + // take the separate particles + TLorentzVector tlv_lepton_1 = getTLV_reco(lepton_pair.at(0).particle_1); + TLorentzVector tlv_lepton_2 = getTLV_reco(lepton_pair.at(0).particle_2); + + TLorentzVector tlv_bjet_1 = getTLV_reco(bb_pair.at(0).particle_1); + TLorentzVector tlv_bjet_2 = getTLV_reco(bb_pair.at(0).particle_2); + + // then make the two possible combinations: + TLorentzVector tlv_l1_b1 = tlv_lepton_1 + tlv_bjet_1; + TLorentzVector tlv_l2_b2 = tlv_lepton_2 + tlv_bjet_2; + + TLorentzVector tlv_l1_b2 = tlv_lepton_1 + tlv_bjet_2; + TLorentzVector tlv_l2_b1 = tlv_lepton_2 + tlv_bjet_1; + + // calculate the average invariant masses for the two combinations: + float mlb_comb1 = (tlv_l1_b1.M() + tlv_l2_b2.M()) / 2.; + float mlb_comb2 = (tlv_l1_b2.M() + tlv_l2_b1.M()) / 2.; + + // std::cout << "Mlb_comb1: " << mlb_comb1 << std::endl; + // std::cout << "Mlb_comb2: " << mlb_comb2 << std::endl; + + // the combination with minimum mlb is the one we pick + if (mlb_comb1 < mlb_comb2) { + lb_pair_1.particle_1 = lepton_pair.at(0).particle_1; + lb_pair_1.particle_2 = bb_pair.at(0).particle_1; + + lb_pair_2.particle_1 = lepton_pair.at(0).particle_2; + lb_pair_2.particle_2 = bb_pair.at(0).particle_2; + + } + + else { + + lb_pair_1.particle_1 = lepton_pair.at(0).particle_1; + lb_pair_1.particle_2 = bb_pair.at(0).particle_2; + + lb_pair_2.particle_1 = lepton_pair.at(0).particle_2; + lb_pair_2.particle_2 = bb_pair.at(0).particle_1; + } + + out_vector.push_back(lb_pair_1); + out_vector.push_back(lb_pair_2); + + return out_vector; +} + +// rather inefficienct, but now get the actually value of mlb again +ROOT::VecOps::RVec +AnalysisFCChh::get_mlb_reco(ROOT::VecOps::RVec lb_pairs) { + + ROOT::VecOps::RVec out_vector; + + // there should be two pairs + if (lb_pairs.size() < 2) { + return out_vector; + } + + TLorentzVector tlv_pair1 = lb_pairs.at(0).merged_TLV(); + TLorentzVector tlv_pair2 = lb_pairs.at(1).merged_TLV(); + + float mlb_reco = (tlv_pair1.M() + tlv_pair2.M()) / 2.; + + out_vector.push_back(mlb_reco); + // std::cout << "Mlb : " << mlb_reco << std::endl; + + return out_vector; +} + +// do trhe same thing and also add met into it +ROOT::VecOps::RVec AnalysisFCChh::get_mlb_MET_reco( + ROOT::VecOps::RVec lb_pairs, + ROOT::VecOps::RVec MET) { + + ROOT::VecOps::RVec out_vector; + + // there should be two pairs and one met + if (lb_pairs.size() < 2 || MET.size() < 1) { + return out_vector; + } + + TLorentzVector tlv_pair1 = lb_pairs.at(0).merged_TLV(); + TLorentzVector tlv_pair2 = lb_pairs.at(1).merged_TLV(); + TLorentzVector tlv_MET = getTLV_MET(MET.at(0)); + + float mlb_reco = (tlv_pair1 + tlv_pair2 + tlv_MET).M() / 2.; + + out_vector.push_back(mlb_reco); + // std::cout << "Mlb : " << mlb_reco << std::endl; + + return out_vector; +} + +// calculate the pzetas, following CMS tautau analyses: +// https://github.com/cardinia/DesyTauAnalysesUL/blob/dev/Common/interface/functions.h#L792-L841 + +ROOT::VecOps::RVec +AnalysisFCChh::get_pzeta_vis(ROOT::VecOps::RVec lepton_pair) { + + ROOT::VecOps::RVec out_vector; + + // there should be one lepton pair + if (lepton_pair.size() < 1) { + return out_vector; + } + + // get the tlvs of the leptons: + TLorentzVector tlv_lepton_1 = getTLV_reco(lepton_pair.at(0).particle_1); + TLorentzVector tlv_lepton_2 = getTLV_reco(lepton_pair.at(0).particle_2); + + // normalize the pT vectors of the leptons to their magnitudes -> make unit + // vectors, split in x and y components + float vec_unit_lep1_x = tlv_lepton_1.Px() / tlv_lepton_1.Pt(); + float vec_unit_lep1_y = tlv_lepton_1.Py() / tlv_lepton_1.Pt(); + + float vec_unit_lep2_x = tlv_lepton_2.Px() / tlv_lepton_2.Pt(); + float vec_unit_lep2_y = tlv_lepton_2.Py() / tlv_lepton_2.Pt(); + + // the sum of the two unit vectors is the bisector + float zx = vec_unit_lep1_x + vec_unit_lep2_x; + float zy = vec_unit_lep1_y + vec_unit_lep2_y; + + // normalize with magnitude again? + float modz = sqrt(zx * zx + zy * zy); + zx = zx / modz; + zy = zy / modz; + + // build the projection of pTll onto this bisector + float vis_ll_x = tlv_lepton_1.Px() + tlv_lepton_2.Px(); + float vis_ll_y = tlv_lepton_1.Py() + tlv_lepton_2.Py(); + + float pzeta_vis = zx * vis_ll_x + zy * vis_ll_y; + + out_vector.push_back(pzeta_vis); + + // std::cout << "pzeta_vis = " << pzeta_vis << std::endl; + + return out_vector; +} + +// for pzeta_miss do the same for bisector, but then project etmiss onot it: +ROOT::VecOps::RVec AnalysisFCChh::get_pzeta_miss( + ROOT::VecOps::RVec lepton_pair, + ROOT::VecOps::RVec MET) { + + ROOT::VecOps::RVec out_vector; + + // there should be one lepton pair and one MET + if (lepton_pair.size() < 1 || MET.size() < 1) { + return out_vector; + } + + // get the tlvs of the leptons: + TLorentzVector tlv_lepton_1 = getTLV_reco(lepton_pair.at(0).particle_1); + TLorentzVector tlv_lepton_2 = getTLV_reco(lepton_pair.at(0).particle_2); + TLorentzVector tlv_MET = getTLV_MET(MET.at(0)); + + // normalize the pT vectors of the leptons to their magnitudes -> make unit + // vectors, split in x and y components + float vec_unit_lep1_x = tlv_lepton_1.Px() / tlv_lepton_1.Pt(); + float vec_unit_lep1_y = tlv_lepton_1.Py() / tlv_lepton_1.Pt(); + + float vec_unit_lep2_x = tlv_lepton_2.Px() / tlv_lepton_2.Pt(); + float vec_unit_lep2_y = tlv_lepton_2.Py() / tlv_lepton_2.Pt(); + + // the sum of the two unit vectors is the bisector + float zx = vec_unit_lep1_x + vec_unit_lep2_x; + float zy = vec_unit_lep1_y + vec_unit_lep2_y; + + // normalize with magnitude again? + float modz = sqrt(zx * zx + zy * zy); + zx = zx / modz; + zy = zy / modz; + + // build the projection of MET onto this bisector + float pzeta_miss = zx * tlv_MET.Pt() * cos(tlv_MET.Phi()) + + zy * tlv_MET.Pt() * sin(tlv_MET.Phi()); + + out_vector.push_back(pzeta_miss); + + // std::cout << "pzeta_miss = " << pzeta_miss << std::endl; + + return out_vector; +} + +// combine the two with a factor applied: CMS tautau uses 0.85 +ROOT::VecOps::RVec +AnalysisFCChh::get_dzeta(ROOT::VecOps::RVec pzeta_miss, + ROOT::VecOps::RVec pzeta_vis, float factor) { + + ROOT::VecOps::RVec out_vector; + + // there should be one pzeta each + if (pzeta_miss.size() < 1 || pzeta_vis.size() < 1) { + return out_vector; + } + + out_vector.push_back(pzeta_miss.at(0) - factor * pzeta_vis.at(0)); + + // std::cout << "dzeta = " << pzeta_miss.at(0) - factor*pzeta_vis.at(0) << + // std::endl; + + return out_vector; +} + +// combine MET with the Zll pair into the HZZ candidate +ROOT::VecOps::RVec AnalysisFCChh::build_HZZ( + ROOT::VecOps::RVec Z_ll_pair, + ROOT::VecOps::RVec MET_obj) { + + ROOT::VecOps::RVec out_vector; + + // if one of the input particles is empty, return an empty vector and a + // warning + if (Z_ll_pair.size() < 1 || MET_obj.size() < 1) { + // std::cout << "Warning in AnalysisFCChh::build_HZZ - one input vector is + // empty, returning an empty vector." << std::endl; + return out_vector; + } + + // else, for now, just take the first of each, should be the "best" one (by + // user input) - flexibility to use all combinations is there, to be + // implemented if needed + TLorentzVector tlv_1 = getTLV_reco(Z_ll_pair.at(0)); + TLorentzVector tlv_2 = getTLV_MET(MET_obj.at(0)); + + TLorentzVector tlv_merged = tlv_1 + tlv_2; + + edm4hep::ReconstructedParticleData particle_merged; + particle_merged.momentum.x = tlv_merged.Px(); + particle_merged.momentum.y = tlv_merged.Py(); + particle_merged.momentum.z = tlv_merged.Pz(); + particle_merged.mass = tlv_merged.M(); + + out_vector.push_back(particle_merged); + + return out_vector; +} + +// get dR between two objects: +ROOT::VecOps::RVec AnalysisFCChh::get_angularDist( + ROOT::VecOps::RVec particle_1, + ROOT::VecOps::RVec particle_2, + TString type) { + + ROOT::VecOps::RVec out_vector; + + // if one of the input particles is empty, fill default value + if (particle_1.size() < 1 || particle_2.size() < 1) { + out_vector.push_back(-999.); + return out_vector; + } + + // else, for now, just take the first of each, should be the "best" one (by + // user input) - flexibility to use all combinations is there, to be + // implemented if needed + TLorentzVector tlv_1 = getTLV_reco(particle_1.at(0)); + TLorentzVector tlv_2 = getTLV_reco(particle_2.at(0)); + + if (type.Contains("dR")) { + out_vector.push_back(tlv_1.DeltaR(tlv_2)); + } + + else if (type.Contains("dEta")) { + out_vector.push_back(abs(tlv_1.Eta() - tlv_2.Eta())); + } + + else if (type.Contains("dPhi")) { + out_vector.push_back(tlv_1.DeltaPhi(tlv_2)); + } + + else { + std::cout + << " Error in AnalysisFCChh::get_angularDist - requested unknown type " + << type << "Returning default of -999." << std::endl; + out_vector.push_back(-999.); + } + + return out_vector; +} + +// get angular distances between MET and an object: +ROOT::VecOps::RVec AnalysisFCChh::get_angularDist_MET( + ROOT::VecOps::RVec particle_1, + ROOT::VecOps::RVec MET_obj, + TString type) { + + ROOT::VecOps::RVec out_vector; + + // if one of the input particles is empty, fill default value + if (particle_1.size() < 1 || MET_obj.size() < 1) { + out_vector.push_back(-999.); + return out_vector; + } + + // else, for now, just take the first of each, should be the "best" one (by + // user input) - flexibility to use all combinations is there, to be + // implemented if needed + TLorentzVector tlv_1 = getTLV_reco(particle_1.at(0)); + TLorentzVector tlv_2 = getTLV_MET(MET_obj.at(0)); + + if (type.Contains("dR")) { + out_vector.push_back(tlv_1.DeltaR(tlv_2)); + } + + else if (type.Contains("dEta")) { + out_vector.push_back(abs(tlv_1.Eta() - tlv_2.Eta())); + } + + else if (type.Contains("dPhi")) { + out_vector.push_back(tlv_1.DeltaPhi(tlv_2)); + } + + else { + std::cout + << " Error in AnalysisFCChh::get_angularDist - requested unknown type " + << type << "Returning default of -999." << std::endl; + out_vector.push_back(-999.); + } + + return out_vector; +} + +// get angular distances between the two particles in a pair: +ROOT::VecOps::RVec +AnalysisFCChh::get_angularDist_pair(ROOT::VecOps::RVec pairs, + TString type) { + + ROOT::VecOps::RVec out_vector; + + // if input pairs is empty, fill default value + if (pairs.size() < 1) { + out_vector.push_back(-999.); + return out_vector; + } + + // else, for now, just take the first of each, should be the "best" one (by + // user input) - flexibility to use all combinations is there, to be + // implemented if needed + TLorentzVector tlv_1 = getTLV_reco(pairs.at(0).particle_1); + TLorentzVector tlv_2 = getTLV_reco(pairs.at(0).particle_2); + + if (type.Contains("dR")) { + out_vector.push_back(tlv_1.DeltaR(tlv_2)); + } + + else if (type.Contains("dEta")) { + out_vector.push_back(abs(tlv_1.Eta() - tlv_2.Eta())); + } + + else if (type.Contains("dPhi")) { + out_vector.push_back(tlv_1.DeltaPhi(tlv_2)); + } + + else { + std::cout + << " Error in AnalysisFCChh::get_angularDist - requested unknown type " + << type << "Returning default of -999." << std::endl; + out_vector.push_back(-999.); + } + + return out_vector; +} + +// get angular distances between the two particles in a pair: MC particles +ROOT::VecOps::RVec +AnalysisFCChh::get_angularDist_pair(ROOT::VecOps::RVec pairs, + TString type) { + + ROOT::VecOps::RVec out_vector; + + // if input pairs is empty, fill default value + if (pairs.size() < 1) { + out_vector.push_back(-999.); + return out_vector; + } + + // else, for now, just take the first of each, should be the "best" one (by + // user input) - flexibility to use all combinations is there, to be + // implemented if needed + TLorentzVector tlv_1 = getTLV_MC(pairs.at(0).particle_1); + TLorentzVector tlv_2 = getTLV_MC(pairs.at(0).particle_2); + + if (type.Contains("dR")) { + out_vector.push_back(tlv_1.DeltaR(tlv_2)); + } + + else if (type.Contains("dEta")) { + out_vector.push_back(abs(tlv_1.Eta() - tlv_2.Eta())); + } + + else if (type.Contains("dPhi")) { + out_vector.push_back(tlv_1.DeltaPhi(tlv_2)); + } + + else { + std::cout + << " Error in AnalysisFCChh::get_angularDist - requested unknown type " + << type << "Returning default of -999." << std::endl; + out_vector.push_back(-999.); + } + + return out_vector; +} + +// function which returns the immediate children of a truth particle +ROOT::VecOps::RVec +AnalysisFCChh::get_immediate_children( + edm4hep::MCParticleData truth_part, + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids) { + + ROOT::VecOps::RVec child_list; + auto first_child_index = truth_part.daughters_begin; + auto last_child_index = truth_part.daughters_end; + auto children_size = last_child_index - first_child_index; + + // std::cout << "children size: " << children_size << std::endl; + + for (int child_i = 0; child_i < children_size; child_i++) { + auto child_i_index = daughter_ids.at(first_child_index + child_i).index; + auto child = truth_particles.at(child_i_index); + // std::cout << "PDG ID of child number " << child_i << " : " << child.PDG + // << std::endl; + child_list.push_back(child); + } + + return child_list; +} + +// function which finds truth higgs in the MC particles and selects the one that +// decays according to requested type (to ZZ or bb here) +ROOT::VecOps::RVec AnalysisFCChh::get_truth_Higgs( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids, TString decay) { + ROOT::VecOps::RVec higgs_list; + + // std::cout << "looking for higgs .." << std::endl; + + for (auto &truth_part : truth_particles) { + if (isH(truth_part)) { + // check into which particles the Higgs decays: + + auto first_child_index = truth_part.daughters_begin; + auto last_child_index = truth_part.daughters_end; + + // std::cout << "Found higgs with children with indices " << + // first_child_index << " , " << last_child_index << std::endl; std::cout + // << "number of higgs daughters:" << last_child_index - first_child_index + // << std::endl; + + // std::cout << "number of higgs daughters:" << last_child_index - + // first_child_index << std::endl; + + // skip the intermediate higgs that only lead to one other particle + if (last_child_index - first_child_index != 2) { + // std::cout << "Error in get_truth_Higgs! Found more or fewer than + // exactly 2 daughters of a higgs boson - this is not expected by code. + // Need to implement a solution still!"<< std::endl; return higgs_list; + continue; + } + + // now get the indices in the daughters vector + auto child_1_MC_index = daughter_ids.at(first_child_index).index; + auto child_2_MC_index = daughter_ids.at(last_child_index - 1).index; + + // then go back to the original vector of MCParticles + auto child_1 = truth_particles.at(child_1_MC_index); + auto child_2 = truth_particles.at(child_2_MC_index); + + // std::cout << "PDG ID of first child: " << child_1.PDG << std::endl; + // std::cout << "PDG ID of second child: " << child_2.PDG << std::endl; + + // std::cout << "Found higgs with status = " << h_status << " and children + // with indices " << child_1.PDG << " , " << child_2.PDG << std::endl; + if (decay.Contains("ZZ") && isZ(child_1) && isZ(child_2)) { + higgs_list.push_back(truth_part); + } + + else if (decay.Contains("bb") && isb(child_1) && isb(child_2)) { + higgs_list.push_back(truth_part); + } + + else if ((decay.Contains("yy") || decay.Contains("aa")) && + isPhoton(child_1) && isPhoton(child_2)) { + higgs_list.push_back(truth_part); + } + } + } + + return higgs_list; +} + +// Same for getting a truth Z (->bb) +ROOT::VecOps::RVec AnalysisFCChh::get_truth_Z_decay( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids, TString decay) { + ROOT::VecOps::RVec Z_list; + + // std::cout << "looking for higgs .." << std::endl; + + for (auto &truth_part : truth_particles) { + if (isZ(truth_part)) { + // check into which particles the Higgs decays: + + auto first_child_index = truth_part.daughters_begin; + auto last_child_index = truth_part.daughters_end; + + // std::cout << "Found Z with children with indices " << first_child_index + // << " , " << last_child_index << std::endl; std::cout << "number of Z + // daughters:" << last_child_index - first_child_index << std::endl; + + // skip the intermediate Z that only lead to another Z + if (last_child_index - first_child_index != 2) { + continue; + } + + // now get the indices in the daughters vector + auto child_1_MC_index = daughter_ids.at(first_child_index).index; + auto child_2_MC_index = daughter_ids.at(last_child_index - 1).index; + + // then go back to the original vector of MCParticles + auto child_1 = truth_particles.at(child_1_MC_index); + auto child_2 = truth_particles.at(child_2_MC_index); + + // std::cout << "PDG ID of first child: " << child_1.PDG << std::endl; + // std::cout << "PDG ID of second child: " << child_2.PDG << std::endl; + + if (decay.Contains("bb") && isb(child_1) && isb(child_2)) { + Z_list.push_back(truth_part); + } + } + } + + return Z_list; +} + +// get the truth flavour of the leptons from taus +ROOT::VecOps::RVec AnalysisFCChh::getTruthLepLepFlavour( + ROOT::VecOps::RVec leps_from_tau) { + + ROOT::VecOps::RVec results_vec; + + if (leps_from_tau.size() != 2) { + std::cout + << "Error - running getTruthLepLepFlavour on event which doesn't have " + "exactly two leptons from taus. This isnt the intended usage." + << std::endl; + return results_vec; + } + + auto pdg_1 = leps_from_tau.at(0).PDG; + auto pdg_2 = leps_from_tau.at(1).PDG; + + if (abs(pdg_1) == 11 && abs(pdg_2) == 11) { + results_vec.push_back(0); + } + + else if (abs(pdg_1) == 13 && abs(pdg_2) == 13) { + results_vec.push_back(1); + } + + else if ((abs(pdg_1) == 11 && abs(pdg_2) == 13) || + (abs(pdg_1) == 13 && abs(pdg_2) == 11)) { + results_vec.push_back(2); + } + + // option for taus, needed for checking bbWW + else if (abs(pdg_1) == 15 || abs(pdg_2) == 15) { + results_vec.push_back(3); + } + + else { + std::cout << "Error - found leptons from taus that are neither electrons " + "nor muons" + << std::endl; + results_vec.push_back(-999); + } + + return results_vec; +} + +// take the vector with truth leptons from taus and pick out the electron or +// muon +ROOT::VecOps::RVec AnalysisFCChh::getTruthEle( + ROOT::VecOps::RVec leps_from_tau) { + + ROOT::VecOps::RVec results_vec; + + for (auto &truth_lep : leps_from_tau) { + + // std::cout << "PDG ID " << abs(truth_lep.PDG) << std::endl; + + // electrons + if (abs(truth_lep.PDG) == 11) { + results_vec.push_back(truth_lep); + } + } + return results_vec; +} + +ROOT::VecOps::RVec AnalysisFCChh::getTruthMu( + ROOT::VecOps::RVec leps_from_tau) { + + ROOT::VecOps::RVec results_vec; + + for (auto &truth_lep : leps_from_tau) { + + // muons + if (abs(truth_lep.PDG) == 13) { + results_vec.push_back(truth_lep); + } + } + return results_vec; +} + +// find the light leptons (e or mu) that originate from a tau decay (which comes +// from a higgs, and not a b-meson) using the truth info -> to use as filter for +// emu tautau evts +ROOT::VecOps::RVec AnalysisFCChh::getLepsFromTau( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec parent_ids) { + // test by simply counting first: + // int counter = 0; + ROOT::VecOps::RVec leps_list; + + // loop over all truth particles and find light leptons from taus that came + // from higgs (the direction tau->light lepton as child appears to be missing + // in the tautau samples) + for (auto &truth_part : truth_particles) { + if (isLightLep(truth_part)) { + bool from_tau_higgs = + isChildOfTauFromHiggs(truth_part, parent_ids, truth_particles); + if (from_tau_higgs) { + // counter+=1; + leps_list.push_back(truth_part); + } + } + } + // std::cout << "Leps from tau-higgs " << counter << std::endl; + return leps_list; +} + +ROOT::VecOps::RVec AnalysisFCChh::getTruthTau( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids, + ROOT::VecOps::RVec parent_ids, TString type) { + + ROOT::VecOps::RVec tau_list; + for (auto &truth_part : truth_particles) { + bool flagchildren = false; + if (isTau(truth_part)) { + + // check also if from Higgs to count only from Higgs ones + if (type.Contains("from_higgs") && + !isFromHiggsDirect( + truth_part, parent_ids, + truth_particles)) { //&& isFromHadron(truth_part, parent_ids, + // truth_particles) ) { + continue; + } + // select both from higgs or from hadrons + if (type.Contains("higgshad") && + !isFromHiggsDirect(truth_part, parent_ids, truth_particles) && + !isFromHadron(truth_part, parent_ids, truth_particles)) { + // continue; + std::cout << " found tau neither from Higgs or from Had" << std::endl; + auto first_parent_index = truth_part.parents_begin; + auto last_parent_index = truth_part.parents_end; + for (int parent_i = first_parent_index; parent_i < last_parent_index; + parent_i++) { + auto parent_MC_index = parent_ids.at(parent_i).index; + auto parent = truth_particles.at(parent_MC_index); + std::cout << "Parent PDG:" << std::endl; + std::cout << parent.PDG << std::endl; + } + continue; + } + + tau_list.push_back(truth_part); + } + } + //} + + return tau_list; +} + +ROOT::VecOps::RVec AnalysisFCChh::getTruthTauLeps( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids, + ROOT::VecOps::RVec parent_ids, TString type) { + + ROOT::VecOps::RVec tau_list; + bool flagchildren = false; + for (auto &truth_part : truth_particles) { + flagchildren = false; + if (isTau(truth_part)) { + + // check also if from Higgs to count only from Higgs ones + if (type.Contains("from_higgs") && + !isFromHiggsDirect( + truth_part, parent_ids, + truth_particles)) { //&& isFromHadron(truth_part, parent_ids, + // truth_particles) ) { + continue; + } + // select both from higgs or from hadrons + if (type.Contains("higgshad") && + !isFromHiggsDirect(truth_part, parent_ids, truth_particles) && + !isFromHadron(truth_part, parent_ids, truth_particles)) { + // continue; + std::cout << " found tau neither from Higgs or from Had" << std::endl; + auto first_parent_index = truth_part.parents_begin; + auto last_parent_index = truth_part.parents_end; + for (int parent_i = first_parent_index; parent_i < last_parent_index; + parent_i++) { + auto parent_MC_index = parent_ids.at(parent_i).index; + auto parent = truth_particles.at(parent_MC_index); + std::cout << "Parent PDG:" << std::endl; + std::cout << parent.PDG << std::endl; + } + continue; + } + bool isItself = true; + while (isItself) { + auto first_child_index = truth_part.daughters_begin; + auto last_child_index = truth_part.daughters_end; + // auto child_1_MC_index = daughter_ids.at(first_child_index).index; + // auto hild_2_MC_index = daughter_ids.at(last_child_index-1).index; + for (int child_i = first_child_index; child_i < last_child_index; + child_i++) { + auto child = truth_particles.at(daughter_ids.at(child_i).index); + if (abs(child.PDG) == 15) { + isItself = true; + truth_part = child; + break; + } else { + isItself = false; + } + } + } + + // std::cout << " found tau with children" << std::endl; + auto first_child_index = truth_part.daughters_begin; + auto last_child_index = truth_part.daughters_end; + for (int ch_i = first_child_index; ch_i < last_child_index; ch_i++) { + auto ch = truth_particles.at(daughter_ids.at(ch_i).index); + std::cout << "Child ID: " << ch.PDG << std::endl; + if (isLep(ch)) { + std::cout << " Is leptonic" << std::endl; + flagchildren = true; + break; + } + } + if (flagchildren) { + tau_list.push_back(truth_part); + } + } + } + + return tau_list; +} + +// find truth (hadronic) taus, to check the tau veto +ROOT::VecOps::RVec AnalysisFCChh::getTruthTauHads( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec daughter_ids, + ROOT::VecOps::RVec parent_ids, TString type) { + + ROOT::VecOps::RVec tau_list; + bool flagchildren = false; + for (auto &truth_part : truth_particles) { + flagchildren = false; + if (isTau(truth_part)) { + + // check also if from Higgs to count only from Higgs ones + if (type.Contains("from_higgs") && + !isFromHiggsDirect( + truth_part, parent_ids, + truth_particles)) { //&& isFromHadron(truth_part, parent_ids, + // truth_particles) ) { + continue; + } + // select both from higgs or from hadrons + if (type.Contains("higgshad") && + !isFromHiggsDirect(truth_part, parent_ids, truth_particles) && + !isFromHadron(truth_part, parent_ids, truth_particles)) { + // continue; + std::cout << " found tau neither from Higgs or from Had" << std::endl; + auto first_parent_index = truth_part.parents_begin; + auto last_parent_index = truth_part.parents_end; + for (int parent_i = first_parent_index; parent_i < last_parent_index; + parent_i++) { + auto parent_MC_index = parent_ids.at(parent_i).index; + auto parent = truth_particles.at(parent_MC_index); + std::cout << "Parent PDG:" << std::endl; + std::cout << parent.PDG << std::endl; + } + continue; + } + bool isItself = true; + while (isItself) { + auto first_child_index = truth_part.daughters_begin; + auto last_child_index = truth_part.daughters_end; + // auto child_1_MC_index = daughter_ids.at(first_child_index).index; + // auto hild_2_MC_index = daughter_ids.at(last_child_index-1).index; + for (int child_i = first_child_index; child_i < last_child_index; + child_i++) { + auto child = truth_particles.at(daughter_ids.at(child_i).index); + if (abs(child.PDG) == 15) { + isItself = true; + truth_part = child; + break; + } else { + isItself = false; + } + } + } + + // std::cout << " found tau with children" << std::endl; + auto first_child_index = truth_part.daughters_begin; + auto last_child_index = truth_part.daughters_end; + for (int ch_i = first_child_index; ch_i < last_child_index; ch_i++) { + auto ch = truth_particles.at(daughter_ids.at(ch_i).index); + std::cout << "Child ID: " << ch.PDG << std::endl; + if (isHadron(ch)) { + std::cout << " Is hadronic" << std::endl; + flagchildren = true; + break; + } + } + if (flagchildren) { + tau_list.push_back(truth_part); + } + } + } + + return tau_list; +} + +// find leptons (including taus?) that came from a H->WW decay +ROOT::VecOps::RVec AnalysisFCChh::getLepsFromW( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec parent_ids) { + // test by simply counting first: + // int counter = 0; + ROOT::VecOps::RVec leps_list; + + // loop over all truth particles and find light leptons from taus that came + // from higgs (the direction tau->light lepton as child appears to be missing + // in the tautau samples) + for (auto &truth_part : truth_particles) { + if (isLep(truth_part)) { // switch to isLightLep for tau veto! + bool from_W_higgs = + isChildOfWFromHiggs(truth_part, parent_ids, truth_particles); + if (from_W_higgs) { + // counter+=1; + leps_list.push_back(truth_part); + } + } + } + // std::cout << "Leps from tau-higgs " << counter << std::endl; + return leps_list; +} + +// find leptons (including taus?) that came from a H->ZZ decay +ROOT::VecOps::RVec AnalysisFCChh::getLepsFromZ( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec parent_ids) { + // test by simply counting first: + // int counter = 0; + ROOT::VecOps::RVec leps_list; + + // loop over all truth particles and find light leptons from taus that came + // from higgs (the direction tau->light lepton as child appears to be missing + // in the tautau samples) + for (auto &truth_part : truth_particles) { + if (isLep(truth_part)) { // switch to isLightLep for tau veto! + bool from_Z_higgs = + isChildOfZFromHiggs(truth_part, parent_ids, truth_particles); + if (from_Z_higgs) { + // counter+=1; + leps_list.push_back(truth_part); + } + } + } + // std::cout << "Leps from tau-higgs " << counter << std::endl; + return leps_list; +} + +// find photons that came from a H->yy decay +ROOT::VecOps::RVec AnalysisFCChh::getPhotonsFromH( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec parent_ids) { + ROOT::VecOps::RVec gamma_list; + + // loop over all truth particles and find stable photons that do not come + // (directly) from a hadron decay + for (auto &truth_part : truth_particles) { + if (isStablePhoton(truth_part)) { + bool from_higgs = hasHiggsParent(truth_part, parent_ids, truth_particles); + if (isFromHadron(truth_part, parent_ids, truth_particles)) { + from_higgs = false; + } + if (from_higgs) { + gamma_list.push_back(truth_part); + } + } + } + // std::cout << "Leps from tau-higgs " << counter << std::endl; + return gamma_list; +} + +// momentum fraction x for tau decays +ROOT::VecOps::RVec AnalysisFCChh::get_x_fraction( + ROOT::VecOps::RVec visible_particle, + ROOT::VecOps::RVec MET) { + ROOT::VecOps::RVec results_vec; + + if (visible_particle.size() < 1 || MET.size() < 1) { + results_vec.push_back(-999.); + return results_vec; + } + + // get the components of the calculation + TLorentzVector met_tlv = getTLV_reco(MET.at(0)); + // TLorentzVector met_tlv = getTLV_MET(MET.at(0)); + TLorentzVector vis_tlv = getTLV_reco(visible_particle.at(0)); + + // float x_fraction = vis_tlv.Pt()/(vis_tlv.Pt()+met_tlv.Pt()); // try scalar + // sum + float x_fraction = + vis_tlv.Pt() / (vis_tlv + met_tlv).Pt(); // vector sum makes more sense? + + // std::cout << " Debug m_col: pT_vis : " << vis_tlv.Pt() << std::endl; + // std::cout << " Debug m_col: pT_miss : " << met_tlv.Pt() << std::endl; + // std::cout << " Debug m_col: x with scalar sum: " << x_fraction << + // std::endl; std::cout << " Debug m_col: x with vector sum: " << + // vis_tlv.Pt()/(vis_tlv+met_tlv).Pt() << std::endl; + + results_vec.push_back(x_fraction); + return results_vec; +} + +// with truth variables +ROOT::VecOps::RVec AnalysisFCChh::get_x_fraction_truth( + ROOT::VecOps::RVec visible_particle, + ROOT::VecOps::RVec MET) { + ROOT::VecOps::RVec results_vec; + + if (visible_particle.size() < 1 || MET.size() < 1) { + results_vec.push_back(-999.); + return results_vec; + } + + // get the components of the calculation + TLorentzVector met_tlv = getTLV_reco(MET.at(0)); + // TLorentzVector met_tlv = getTLV_MET(MET.at(0)); + TLorentzVector vis_tlv = getTLV_MC(visible_particle.at(0)); + + // float x_fraction = vis_tlv.Pt()/(vis_tlv.Pt()+met_tlv.Pt()); // try scalar + // sum + float x_fraction = + vis_tlv.Pt() / (vis_tlv + met_tlv).Pt(); // vector sum makes more sense? + + // std::cout << " Debug m_col: pT_vis truth : " << vis_tlv.Pt() << std::endl; + // std::cout << " Debug m_col: pT_miss truth : " << met_tlv.Pt() << std::endl; + // std::cout << " Debug m_col: x truth with vector sum: " << x_fraction << + // std::endl; std::cout << " Debug m_col: x with vector sum: " << + // vis_tlv.Pt()/(vis_tlv+met_tlv).Pt() << std::endl; + + results_vec.push_back(x_fraction); + return results_vec; +} + +ROOT::VecOps::RVec AnalysisFCChh::get_mtautau_col( + ROOT::VecOps::RVec ll_pair_merged, + ROOT::VecOps::RVec x1, ROOT::VecOps::RVec x2) { + ROOT::VecOps::RVec results_vec; + + // here there is no result if any of the arguments is actually not filled + // (e.g. no selected pair) OR if one of the xs is 0 or negative (shouldnt + // happen but better be on the safe side?) + if (ll_pair_merged.size() < 1 || x1.size() < 1 || x2.size() < 1 || + x1.at(0) <= 0. || x2.at(0) <= 0.) { + results_vec.push_back(-999.); + return results_vec; + } + + float mtautau_vis = getTLV_reco(ll_pair_merged.at(0)) + .M(); // check against manual calculation?? + float mtautau_col = mtautau_vis / (sqrt(x1.at(0) * x2.at(0))); + + // std::cout << " Debug m_col: mtautau_vis mass = " << mtautau_vis << + // std::endl; std::cout << " Debug m_col: m_col mass = " << mtautau_col << + // std::endl; + + results_vec.push_back(mtautau_col); + return results_vec; +} + +// merge the invariant bb mass and the tautau colinear mass for the +// bbtautau(emu) analysis +ROOT::VecOps::RVec AnalysisFCChh::get_mbbtautau_col( + ROOT::VecOps::RVec bb_pair_merged, + ROOT::VecOps::RVec mtautau_col) { + ROOT::VecOps::RVec results_vec; + + // here there is no result if any of the arguments is actually not filled or + // if the mtautau col is at default value -999. + if (bb_pair_merged.size() < 1 || mtautau_col.size() < 1 || + mtautau_col.at(0) <= 0.) { + results_vec.push_back(-999.); + return results_vec; + } + + float mbb = getTLV_reco(bb_pair_merged.at(0)).M(); + + results_vec.push_back(mbb + mtautau_col.at(0)); + return results_vec; +} + +// truth matching: find a reco part that matches the truth part within cone of +// dR_thres +ROOT::VecOps::RVec +AnalysisFCChh::find_reco_matched( + ROOT::VecOps::RVec truth_parts_to_match, + ROOT::VecOps::RVec reco_parts_all, + float dR_thres) { + + ROOT::VecOps::RVec out_vector; + + // if no iput part, return nothing (didnt input correct event then) + if (truth_parts_to_match.size() < 1) { + return out_vector; + } + + // currently only want one particle to match, to not get confused with + // vectors: + if (truth_parts_to_match.size() != 1) { + std::cout << "Error! Found more than one truth part in input to " + "find_reco_matched() ! Not intended?" + << std::endl; + // return out_vector; + } + + // take the TLV of that particle we want to match: + TLorentzVector truth_part_tlv = getTLV_MC(truth_parts_to_match.at(0)); + + // loop over all reco parts and find if there is one within the dr threshold + // to the truth part + for (auto &check_reco_part : reco_parts_all) { + TLorentzVector check_reco_part_tlv = getTLV_reco(check_reco_part); + float dR_val = truth_part_tlv.DeltaR(check_reco_part_tlv); + + if (dR_val <= dR_thres) { + out_vector.push_back(check_reco_part); + } + + check_reco_part_tlv.Clear(); + } + + return out_vector; +} + +// manual implementation of the delphes isolation criterion +ROOT::VecOps::RVec AnalysisFCChh::get_IP_delphes( + ROOT::VecOps::RVec test_parts, + ROOT::VecOps::RVec reco_parts_all, + float dR_min, float pT_min, bool exclude_light_leps) { + + ROOT::VecOps::RVec out_vector; + + if (test_parts.size() < 1) { + return out_vector; + } + + for (auto &test_part : test_parts) { + // first get the pT of the test particle: + TLorentzVector tlv_test_part = getTLV_reco(test_part); + float pT_test_part = tlv_test_part.Pt(); + + float sum_pT = 0; + + // loop over all other parts and sum up pTs if they are within the dR cone + // and above min pT + for (auto &reco_part : reco_parts_all) { + + // check type of particle first: + // std::cout << "PDG ID of particle:" << reco_part.m_particleIDUsed << + // std::endl; + + // exclude electrons and muons + + TLorentzVector tlv_reco_part = getTLV_reco(reco_part); + + if (tlv_reco_part.Pt() > pT_min && + tlv_test_part.DeltaR(tlv_reco_part) < dR_min) { + sum_pT += tlv_reco_part.Pt(); + } + + tlv_reco_part.Clear(); + } + + float IP_val = sum_pT / pT_test_part; + + out_vector.push_back(IP_val); + } + + return out_vector; +} + +// index navigation matching reco and MC particle to get pdg id of reco +// particle, following the ReconstructedParticle2MC::getRP2MC_pdg fuction in +// base FW +ROOT::VecOps::RVec +AnalysisFCChh::filter_lightLeps( + ROOT::VecOps::RVec recind, ROOT::VecOps::RVec mcind, + ROOT::VecOps::RVec reco, + ROOT::VecOps::RVec mc) { + + ROOT::VecOps::RVec out_vector; + + for (auto &reco_index : recind) { + + std::cout << "Reco index:" << reco_index << std::endl; + std::cout << "MC index:" << mcind.at(reco_index) << std::endl; + + // testing: + auto pdg_id = mc.at(mcind.at(reco_index)).PDG; + float mass = reco.at(reco_index).mass; + + std::cout << "MC PDG ID:" << pdg_id << std::endl; + std::cout << "Reco mass:" << mass << std::endl; + } + + return out_vector; +} + +// muon mass: 0.105658 +// electron mass: 0.000510999 ? + +// find the neutrinos that originate from a W-decay (which comes from a higgs, +// and not a b-meson) using the truth info -> to use for truth MET +ROOT::VecOps::RVec AnalysisFCChh::getNusFromW( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec parent_ids) { + + ROOT::VecOps::RVec nus_list; + + // loop over all truth particles and find neutrinos from Ws that came from + // higgs (the direction tau->light lepton as child appears to be missing in + // the tautau samples) + for (auto &truth_part : truth_particles) { + if (isNeutrino(truth_part)) { + bool from_W_higgs = + isChildOfWFromHiggs(truth_part, parent_ids, truth_particles); + if (from_W_higgs) { + // counter+=1; + nus_list.push_back(truth_part); + } + } + } + // std::cout << "Leps from tau-higgs " << counter << std::endl; + return nus_list; +} + +// get truth met -> return as recoparticle so can use instead of reco met in +// other ftcs for some checks +ROOT::VecOps::RVec +AnalysisFCChh::getTruthMETObj( + ROOT::VecOps::RVec truth_particles, + ROOT::VecOps::RVec parent_ids, TString type) { + + ROOT::VecOps::RVec out_vector; + + ROOT::VecOps::RVec selected_nus; + + for (auto &truth_part : truth_particles) { + if (isNeutrino(truth_part)) { + + // sum only the neutrinos from different higgs decays if requested + if (type.Contains("hww_only") && + isChildOfWFromHiggs(truth_part, parent_ids, truth_particles)) { + selected_nus.push_back(truth_part); + } + + else if (type.Contains("htautau_only") && + isChildOfTauFromHiggs(truth_part, parent_ids, truth_particles)) { + // std::cout << "getting truth MET from taus only" << std::endl; + selected_nus.push_back(truth_part); + } + + else if (type.Contains("hzz_only") && + isChildOfZFromHiggs(truth_part, parent_ids, truth_particles)) { + selected_nus.push_back(truth_part); + } + + else if (type.Contains("all_nu")) { + selected_nus.push_back(truth_part); + } + } + } + + // sum up + TLorentzVector tlv_total; + + for (auto &nu : selected_nus) { + TLorentzVector tlv_nu = getTLV_MC(nu); + tlv_total += tlv_nu; + } + + edm4hep::ReconstructedParticleData met_obj; + met_obj.momentum.x = tlv_total.Px(); + met_obj.momentum.y = tlv_total.Py(); + met_obj.momentum.z = 0.; + met_obj.mass = 0.; + + // std::cout << "Truth MET from building object: " << + // sqrt(met_obj.momentum.x*met_obj.momentum.x + + // met_obj.momentum.y*met_obj.momentum.y) << std::endl; + + out_vector.push_back(met_obj); + + return out_vector; +} + +// additonal code for validation of new delphes card: + +// helper function to find dR matched mc particle for a single reco particle - +// returns vector of size 1 always, only the one that is closest in dR! +// (technically doesnt need to be vector at this stage ..) +ROOT::VecOps::RVec +AnalysisFCChh::find_mc_matched_particle( + edm4hep::ReconstructedParticleData reco_part_to_match, + ROOT::VecOps::RVec check_mc_parts, + float dR_thres) { + + ROOT::VecOps::RVec out_vector; + + TLorentzVector reco_part_tlv = getTLV_reco(reco_part_to_match); + + for (auto &check_mc_part : check_mc_parts) { + TLorentzVector check_mc_part_tlv = getTLV_MC(check_mc_part); + + float dR_val = reco_part_tlv.DeltaR(check_mc_part_tlv); + + if (dR_val <= dR_thres) { + + // check if already sth in the vector - always want only exactly one + // match! + + if (out_vector.size() > 0) { + // check which one is closer + float dR_val_old = reco_part_tlv.DeltaR(getTLV_MC(out_vector.at(0))); + + float pT_diff_old = + abs(reco_part_tlv.Pt() - getTLV_MC(out_vector.at(0)).Pt()); + + if (dR_val < dR_val_old) { + out_vector.at(0) = check_mc_part; + + if (pT_diff_old < abs(reco_part_tlv.Pt() - check_mc_part_tlv.Pt())) { + std::cout << "Found case where closest in pT is not closest in dR" + << std::endl; + } + } + } + + else { + out_vector.push_back(check_mc_part); + } + } + + check_mc_part_tlv.Clear(); + } + + return out_vector; +} + +// helper function to find dR matched reco particle for a single truth particle +// - returns vector of size 1 always, only the one that is closest in dR! +// (technically doesnt need to be vector at this stage ..) +ROOT::VecOps::RVec +AnalysisFCChh::find_reco_matched_particle( + edm4hep::MCParticleData truth_part_to_match, + ROOT::VecOps::RVec check_reco_parts, + float dR_thres) { + + ROOT::VecOps::RVec out_vector; + + TLorentzVector truth_part_tlv = getTLV_MC(truth_part_to_match); + + for (auto &check_reco_part : check_reco_parts) { + TLorentzVector check_reco_part_tlv = getTLV_reco(check_reco_part); + + float dR_val = truth_part_tlv.DeltaR(check_reco_part_tlv); + + if (dR_val <= dR_thres) { + + // check if already sth in the vector - always want only exactly one + // match! + + if (out_vector.size() > 0) { + // check which one is closer + float dR_val_old = truth_part_tlv.DeltaR(getTLV_reco(out_vector.at(0))); + + float pT_diff_old = + abs(truth_part_tlv.Pt() - getTLV_reco(out_vector.at(0)).Pt()); + + if (dR_val < dR_val_old) { + out_vector.at(0) = check_reco_part; + + if (pT_diff_old < + abs(truth_part_tlv.Pt() - check_reco_part_tlv.Pt())) { + std::cout << "Found case where closest in pT is not closest in dR" + << std::endl; + } + } + } + + else { + out_vector.push_back(check_reco_part); + } + } + + check_reco_part_tlv.Clear(); + } + + return out_vector; +} + +// same as above but returns the index of the matched particle instead +ROOT::VecOps::RVec AnalysisFCChh::find_reco_matched_index( + edm4hep::MCParticleData truth_part_to_match, + ROOT::VecOps::RVec check_reco_parts, + float dR_thres) { + + ROOT::VecOps::RVec out_vector; + + TLorentzVector truth_part_tlv = getTLV_MC(truth_part_to_match); + + for (int i = 0; i < check_reco_parts.size(); ++i) { + + edm4hep::ReconstructedParticleData check_reco_part = check_reco_parts[i]; + + TLorentzVector check_reco_part_tlv = getTLV_reco(check_reco_part); + + float dR_val = truth_part_tlv.DeltaR(check_reco_part_tlv); + + if (dR_val <= dR_thres) { + + // check if already sth in the vector - always want only exactly one + // match! + + if (out_vector.size() > 0) { + // check which one is closer + edm4hep::ReconstructedParticleData match_old = + check_reco_parts[out_vector[0]]; // edit this to be TLV directly + + float dR_val_old = truth_part_tlv.DeltaR(getTLV_reco(match_old)); + + float pT_diff_old = + abs(truth_part_tlv.Pt() - getTLV_reco(match_old).Pt()); + + if (dR_val < dR_val_old) { + out_vector.at(0) = i; + + if (pT_diff_old < + abs(truth_part_tlv.Pt() - check_reco_part_tlv.Pt())) { + std::cout << "Found case where closest in pT is not closest in dR" + << std::endl; + } + } + } + + else { + out_vector.push_back(i); + } + } + + check_reco_part_tlv.Clear(); + } + + return out_vector; +} + +// truth -> reco matching for a vector of generic truth particles - this doesnt +// check if the type of particles are the same! -> make sure you give the +// correct collections! +ROOT::VecOps::RVec +AnalysisFCChh::find_reco_matches( + ROOT::VecOps::RVec truth_parts, + ROOT::VecOps::RVec reco_particles, + float dR_thres) { + + ROOT::VecOps::RVec out_vector; + + // if no input part, return nothing + if (truth_parts.size() < 1) { + return out_vector; + } + + for (auto &truth_part : truth_parts) { + + ROOT::VecOps::RVec reco_match_vector = + find_reco_matched_particle(truth_part, reco_particles, dR_thres); + + if (reco_match_vector.size() > 1) { + std::cout << "Warning in AnalysisFCChh::find_reco_matches() - Truth " + "particle matched to more than one reco particle." + << std::endl; + } + + // check that the reco particle is not already in the out_vector + bool isAlready = false; + for (auto &out_i : out_vector) { + if ((getTLV_reco(reco_match_vector[0]).Pt() == getTLV_reco(out_i).Pt()) && + (getTLV_reco(reco_match_vector[0]).Eta() == + getTLV_reco(out_i).Eta())) { + isAlready = true; + // std::cout<<"Already in the array"< +AnalysisFCChh::find_reco_matches_no_remove( + ROOT::VecOps::RVec truth_parts, + ROOT::VecOps::RVec reco_particles, + float dR_thres) { + + ROOT::VecOps::RVec out_vector; + + // if no input part, return nothing + if (truth_parts.size() < 1) { + return out_vector; + } + + for (auto &truth_part : truth_parts) { + + ROOT::VecOps::RVec reco_match_vector = + find_reco_matched_particle(truth_part, reco_particles, dR_thres); + + if (reco_match_vector.size() > 1) { + std::cout << "Warning in AnalysisFCChh::find_reco_matches() - Truth " + "particle matched to more than one reco particle." + << std::endl; + } + + out_vector.append(reco_match_vector.begin(), reco_match_vector.end()); + } + + return out_vector; +} + +// variation of the previous function, with the addition of a set of MC +// particles that must not match with the reco ones! +ROOT::VecOps::RVec +AnalysisFCChh::find_reco_matches_exclusive( + ROOT::VecOps::RVec truth_parts, + ROOT::VecOps::RVec truth_parts_exc, + ROOT::VecOps::RVec reco_particles, + float dR_thres) { + + ROOT::VecOps::RVec out_vector; + + // if no input part, return nothing + if (truth_parts.size() < 1) { + return out_vector; + } + + if (truth_parts_exc.size() < 1) { + // std::cout<<"Use find_reco_matches!"< reco_match_vector = + find_reco_matched_particle(truth_part, reco_particles, dR_thres); + ROOT::VecOps::RVec mc_excluded_vector = + find_mc_matched_particle(reco_match_vector[0], truth_parts_exc, 1.0); + if (mc_excluded_vector.size() > 0) { + std::cout << "Excluded MC particle found matching with reco obj!" + << std::endl; + continue; + } + // if (reco_match_vector.size() > 1){ + // std::cout << "Warning in AnalysisFCChh::find_reco_matches() - Truth + // particle matched to more than one reco particle." << std::endl; + // } + + // check that the reco particle is not already in the out_vector + bool isAlready = false; + for (auto &out_i : out_vector) { + if ((getTLV_reco(reco_match_vector[0]).Pt() == getTLV_reco(out_i).Pt()) && + (getTLV_reco(reco_match_vector[0]).Eta() == + getTLV_reco(out_i).Eta())) { + isAlready = true; + // std::cout<<"Already in the array"< AnalysisFCChh::find_truth_matches( + ROOT::VecOps::RVec truth_parts, + ROOT::VecOps::RVec reco_particles, + float dR_thres) { + + ROOT::VecOps::RVec out_vector; + + // if no input part, return nothing + if (truth_parts.size() < 1) { + return out_vector; + } + + for (auto &truth_part : truth_parts) { + + ROOT::VecOps::RVec reco_match_vector = + find_reco_matched_particle(truth_part, reco_particles, dR_thres); + + if (reco_match_vector.size() > 1) { + std::cout << "Warning in AnalysisFCChh::find_reco_matches() - Truth " + "particle matched to more than one reco particle." + << std::endl; + } + + if (reco_match_vector.size() == 1) { + out_vector.push_back(truth_part); + } + // out_vector.append(reco_match_vector.begin(), reco_match_vector.end()); + } + + return out_vector; +} + +// same as above just with indices +ROOT::VecOps::RVec AnalysisFCChh::find_reco_match_indices( + ROOT::VecOps::RVec truth_parts, + ROOT::VecOps::RVec reco_particles, + float dR_thres) { + + ROOT::VecOps::RVec out_vector; + + // if no input part, return nothing + if (truth_parts.size() < 1) { + return out_vector; + } + + for (auto &truth_part : truth_parts) { + + ROOT::VecOps::RVec reco_match_vector = + find_reco_matched_index(truth_part, reco_particles, dR_thres); + + if (reco_match_vector.size() > 1) { + std::cout << "Warning in AnalysisFCChh::find_reco_matches() - Truth " + "particle matched to more than one reco particle." + << std::endl; + } + + out_vector.append(reco_match_vector.begin(), reco_match_vector.end()); + } + + return out_vector; +} + +// truth matching: take as input the truth leptons from e.g. HWW decay and check +// if they have a reco match within dR cone - Note: skip taus! +ROOT::VecOps::RVec +AnalysisFCChh::find_true_signal_leps_reco_matches( + ROOT::VecOps::RVec truth_leps_to_match, + ROOT::VecOps::RVec reco_electrons, + ROOT::VecOps::RVec reco_muons, + float dR_thres) { + + ROOT::VecOps::RVec out_vector; + + // if no input part, return nothing + if (truth_leps_to_match.size() < 1) { + return out_vector; + } + + // check the flavour of the input truth particle, if it is a tau we skip it + for (auto &truth_lep : truth_leps_to_match) { + + if (!isLightLep(truth_lep)) { + // std::cout << "Info: Problem in + // AnalysisFCChh::find_true_signal_leps_reco_matches() - Found truth tau + // (or other non light lepton) in attempt to match to reco, skipping." << + // std::endl; + continue; + } + + // TLorentzVector truth_part_tlv = getTLV_MC(truth_lep); + + // truth particle should thus be either an electron or a muon: + + // checking electrons: + if (abs(truth_lep.PDG) == 11) { + + ROOT::VecOps::RVec ele_match_vector = + find_reco_matched_particle(truth_lep, reco_electrons, dR_thres); + + // warning if match to more than one particle: + if (ele_match_vector.size() > 1) { + std::cout + << "Warning in AnalysisFCChh::find_true_signal_leps_reco_matches() " + "- Truth electron matched to more than one reco electron." + << std::endl; + std::cout << "Truth electron has pT = " << getTLV_MC(truth_lep).Pt() + << " charge = " << truth_lep.charge + << " pdg = " << truth_lep.PDG << std::endl; + // check the pTs of them: + for (auto &matched_ele : ele_match_vector) { + std::cout << "Matched electron with pT = " + << getTLV_reco(matched_ele).Pt() + << " charge = " << matched_ele.charge + << " dR distance to truth = " + << getTLV_MC(truth_lep).DeltaR(getTLV_reco(matched_ele)) + << std::endl; + } + } + + out_vector.append(ele_match_vector.begin(), ele_match_vector.end()); + + } + + // checking muons: + else if (abs(truth_lep.PDG) == 13) { + + ROOT::VecOps::RVec mu_match_vector = + find_reco_matched_particle(truth_lep, reco_muons, dR_thres); + + // warning if match to more than one particle: + if (mu_match_vector.size() > 1) { + std::cout + << "Warning in AnalysisFCChh::find_true_signal_leps_reco_matches() " + "- Truth muon matched to more than one reco muon." + << std::endl; + std::cout << "Truth muon has pT = " << getTLV_MC(truth_lep).Pt() + << " charge = " << truth_lep.charge + << " pdg = " << truth_lep.PDG << std::endl; + + // check the pTs of them: + for (auto &matched_mu : mu_match_vector) { + std::cout << "Matched muon with pT = " << getTLV_reco(matched_mu).Pt() + << " charge = " << matched_mu.charge + << "dR distance to truth = " + << getTLV_MC(truth_lep).DeltaR(getTLV_reco(matched_mu)) + << std::endl; + } + } + + out_vector.append(mu_match_vector.begin(), mu_match_vector.end()); + } + } + + return out_vector; +} + +// find the indices of reco matched particles, assume here that pdg id filtering +// of the input mc particles is already done and only the matching reco +// collection is passed +ROOT::VecOps::RVec AnalysisFCChh::find_truth_to_reco_matches_indices( + ROOT::VecOps::RVec truth_leps_to_match, + ROOT::VecOps::RVec reco_parts, + int pdg_ID, float dR_thres) { + + ROOT::VecOps::RVec out_vector; + + // if no input part, return nothing + if (truth_leps_to_match.size() < 1) { + return out_vector; + } + + // check the flavour of the input truth particle, if it is a tau we skip it + for (auto &truth_lep : truth_leps_to_match) { + + // std::cout << "Matching truth particle with pdgID: " << abs(truth_lep.PDG) + // << std::endl; + + // match only a certain requested type of particle + if (abs(truth_lep.PDG) != pdg_ID) { + // std::cout << "Skipping" << std::endl; + continue; + } + + // std::cout << "Running the match" << std::endl; + + ROOT::VecOps::RVec reco_match_indices_vector = + find_reco_matched_index(truth_lep, reco_parts, dR_thres); + + if (reco_match_indices_vector.size() > 1) { + std::cout + << "Warning in AnalysisFCChh::find_truth_to_reco_matches_indices() - " + "Truth particle matched to more than one reco particle." + << std::endl; + } + + out_vector.append(reco_match_indices_vector.begin(), + reco_match_indices_vector.end()); + } + + return out_vector; +} + +// try to get the isoVar when its filled as a usercollection +// ROOT::VecOps::RVec +// AnalysisFCChh::get_isoVar(ROOT::VecOps::RVec +// reco_parts_to_check, ROOT::VecOps::RVec +// all_reco_parts){ +// // first check the index of the particle we want to get the iso var for + +// ROOT::VecOps::RVec out_vector; + +// for (auto & reco_check_part : reco_parts_to_check){ +// std::cout << "index of the particle is:" << +// reco_check_part.index << std::endl; + +// } + +// return out_vector; + +// } diff --git a/examples/FCChh/HH_bbtautau/CMakeLists.txt b/examples/FCChh/HH_bbtautau/CMakeLists.txt deleted file mode 100644 index 42f534e81c..0000000000 --- a/examples/FCChh/HH_bbtautau/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_executable(analysis analysis.cc) -target_link_libraries(analysis ${FCCEDM_LIBRARIES} EDM4HEP::edm4hep ROOT::ROOTDataFrame ROOT::Physics FCCAnalyses) -target_include_directories(analysis PUBLIC ${FCCEDM_INCLUDE_DIRS} ${ROOT_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/analyzers/dataframe) diff --git a/examples/FCChh/HH_bbtautau/analysis.cc b/examples/FCChh/HH_bbtautau/analysis.cc deleted file mode 100644 index e1fb1feb24..0000000000 --- a/examples/FCChh/HH_bbtautau/analysis.cc +++ /dev/null @@ -1,82 +0,0 @@ - -#include -#include "TLorentzVector.h" -#include - -// FCC event datamodel includes -//#include "datamodel/ParticleData.h" -//#include "datamodel/LorentzVector.h" -//#include "datamodel/JetData.h" -//#include "datamodel/TaggedParticleData.h" -//#include "datamodel/TaggedJetData.h" -// Legacy class - -#include "ReconstructedParticle.h" -#include "ReconstructedParticle2MC.h" -#include "ReconstructedParticle2Track.h" -#include "MCParticle.h" - -auto _m = edm4hep::ReconstructedParticleData(); - -// Reproduce Heppy analysis -int main(int argc, char* argv[]){ - - - #ifdef ENABLEIMPLICITMT - ROOT::EnableImplicitMT(); - #endif - - // fcc edm libraries - //gSystem->Load("libdatamodel.so"); - gSystem->Load("libedm4hep"); - gSystem->Load("libpodio"); - gSystem->Load("libFCCAnalyses"); - // very basic command line argument parsing - if (argc < 3) { - std::cout << "error: need to specify fcc data files to analyze as command line arguments" << std::endl; - std::cout << "usage: analysis outfilename.root datafile1.root datafile2.root ... datafileN.root " << std::endl; - return 1; - } - std::cout << "Read files... "; - std::vector filenames; - - std::string outfilename = argv[1]; - for (int i = 2; i < argc; ++i) { - std::cout << " " << argv[i]; - filenames.push_back(argv[i]); - } - std::cout << std::endl; - - std::cout << "Creating TDataFrame ..." << std::endl; - ROOT::RDataFrame df("events", filenames); - - std::cout << "Apply selectors and define new branches ..." << std::endl; - auto selectors = df - .Define("MC_px", getMC_px,{"Particle"}) - .Define("MC_py", getMC_py,{"Particle"}) - .Define("MC_pz", getMC_pz,{"Particle"}) - .Define("MC_p", getMC_p,{"Particle"}) - .Define("RP_p", getRP_p,{"ReconstructedParticles"}) - //.Define("RPMC_p", getRP2MC_p,{"MCRecoAssociations#0", "MCRecoAssociations#1","ReconstructedParticles", "Particle"}) - .Define("RPMC_p", getRP2MC_p,{"MCRecoAssociations#0.index", "MCRecoAssociations#1.index","ReconstructedParticles", "Particle"}) - //.Define("RPMC_p2", getRP2MC_p_test2,{"MCRecoAssociations#0.index", "MCRecoAssociations#1.index","ReconstructedParticles", "Particle"}) - //.Define("RPMC_p3", getRP2MC_p_test3,{"MCRecoAssociations#0.index", "MCRecoAssociations#1.index"}) - ; - auto nentries = selectors.Count(); - std::cout << "Count events: " << *nentries << std::endl; - std::cout << "Writing snapshot to disk ... \t" << outfilename << std::endl; - selectors.Snapshot("events", outfilename, - { - "MC_px", - "MC_py", - "MC_pz", - "MC_p", - "RP_p", - //"RPMC_p", - "RPMC_p2", - "RPMC_p3" - } - ); - - return 0; -} diff --git a/examples/FCChh/HH_bbtautau/analysis.py b/examples/FCChh/HH_bbtautau/analysis.py deleted file mode 100644 index 1f8c88c5d8..0000000000 --- a/examples/FCChh/HH_bbtautau/analysis.py +++ /dev/null @@ -1,77 +0,0 @@ -import sys -import ROOT - -print ("Load cxx analyzers ... ",) -ROOT.gSystem.Load("libedm4hep") -ROOT.gSystem.Load("libpodio") -ROOT.gSystem.Load("libFCCAnalyses") -ROOT.gErrorIgnoreLevel = ROOT.kFatal -_edm = ROOT.edm4hep.ReconstructedParticleData() -_pod = ROOT.podio.ObjectID() -_fcc = ROOT.getMC_px - -print ('edm4hep ',_edm) -print ('podio ',_pod) -print ('fccana ',_fcc) - -class analysis(): - - #__________________________________________________________ - def __init__(self, inputlist, outname, ncpu): - self.outname = outname - if ".root" not in outname: - self.outname+=".root" - - ROOT.ROOT.EnableImplicitMT(ncpu) - - self.df = ROOT.RDataFrame("events", inputlist) - print (" done") - #__________________________________________________________ - def run(self): - - df2 = (self.df - .Define("selected_jets", "selRP_pT(50.)(Jet)") - .Define("jet_pT", "getRP_pt(Jet)") - .Define("seljet_pT", "getRP_pt(selected_jets)") - .Alias("Jet3","Jet#3.index") - .Define("JET_btag", "getJet_btag(Jet3, ParticleIDs, ParticleIDs_0)") - .Define("EVT_nbtag", "getJet_ntags(JET_btag)") - - ) - - # select branches for output file - branchList = ROOT.vector('string')() - for branchName in [ - "jet_pT", - "seljet_pT", - "EVT_nbtag" - - ]: - branchList.push_back(branchName) - df2.Snapshot("events", self.outname, branchList) - - - -# example call for standalone file -# python examples/FCChh/HH_bbtautau/analysis.py /eos/experiment/fcc/hh/generation/DelphesEvents/fcc_v04/mgp8_pp_tt012j_5f/events_012599692.root - -if __name__ == "__main__": - - if len(sys.argv)==1: - print ("usage:") - print ("python ",sys.argv[0]," file.root") - sys.exit(3) - infile = sys.argv[1] - outDir = sys.argv[0].replace(sys.argv[0].split('/')[0],'outputs/FCChh').replace('analysis.py','')+'/' - import os - os.system("mkdir -p {}".format(outDir)) - outfile = outDir+infile.split('/')[-1] - ncpus = 2 - analysis = analysis(infile, outfile, ncpus) - analysis.run() - - tf = ROOT.TFile(infile) - entries = tf.events.GetEntries() - p = ROOT.TParameter(int)( "eventsProcessed", entries) - outf=ROOT.TFile(outfile,"UPDATE") - p.Write() diff --git a/examples/FCChh/HH_bbtautau/finalSel.py b/examples/FCChh/HH_bbtautau/finalSel.py deleted file mode 100644 index 5bf0200d00..0000000000 --- a/examples/FCChh/HH_bbtautau/finalSel.py +++ /dev/null @@ -1,47 +0,0 @@ -#python FCChhAnalyses/FCChh/ttHH/dataframe/finalSel.py -import sys, os -import ROOT - -###Input directory where the files produced at the pre-selection level are -baseDir = "FCChh/HH_bbtautau/" - -###Link to the dictonary that contains all the cross section informations etc... -procDict = 'FCC_procDict_fcc_v04.json' - -process_list=['pwp8_pp_hh_lambda100_5f_hhbbaa', - 'mgp8_pp_bbtata_QED', - 'mgp8_pp_bbtata_QCDQED', - 'mgp8_pp_tt012j_5f', - ] - -###Dictionnay of the list of cuts. The key is the name of the selection that will be added to the output file -cut_list = {"sel0":"seljet_pT.size()>6", - } - -###Optinally Define new variables -define_list = {"seljet_pT_0":"seljet_pT.at(0)", - "seljet_pT_1":"seljet_pT.at(1)", - "seljet_pT_2":"seljet_pT.at(2)", - "seljet_pT_3":"seljet_pT.at(3)", - "seljet_pT_4":"seljet_pT.at(4)", - "seljet_pT_5":"seljet_pT.at(5)"} - -###Dictionary for the ouput variable/hitograms. The key is the name of the variable in the output files. "name" is the name of the variable in the input file, "title" is the x-axis label of the histogram, "bin" the number of bins of the histogram, "xmin" the minimum x-axis value and "xmax" the maximum x-axis value. -variables = { - "jet_pt_0":{"name":"seljet_pT_0","title":"Leading jet p_{T} [GeV]","bin":100,"xmin":0,"xmax":4000}, - "jet_pt_1":{"name":"seljet_pT_1","title":"Sub leading jet p_{T} [GeV]","bin":100,"xmin":0,"xmax":4000}, - "jet_pt_2":{"name":"seljet_pT_2","title":"3rd jet p_{T} [GeV]","bin":100,"xmin":0,"xmax":4000}, - "jet_pt_3":{"name":"seljet_pT_3","title":"4th jet p_{T} [GeV]","bin":100,"xmin":0,"xmax":4000}, - "jet_pt_4":{"name":"seljet_pT_4","title":"5th jet p_{T} [GeV]","bin":100,"xmin":0,"xmax":4000}, - "jet_pt_5":{"name":"seljet_pT_5","title":"6th jet p_{T} [GeV]","bin":100,"xmin":0,"xmax":4000}, -} - -###Number of CPUs to use -NUM_CPUS = 5 - -###This part is standard to all analyses -sys.path.append('./bin') -import runDataFrameFinal as rdf -#import bin.runDataFrameFinal as rdf -myana=rdf.runDataFrameFinal(baseDir,procDict,process_list,cut_list,variables,defines=define_list) -myana.run(ncpu=NUM_CPUS) diff --git a/examples/FCChh/HH_bbtautau/plots.py b/examples/FCChh/HH_bbtautau/plots.py deleted file mode 100644 index b5043a2f3c..0000000000 --- a/examples/FCChh/HH_bbtautau/plots.py +++ /dev/null @@ -1,40 +0,0 @@ -import ROOT - -# global parameters -intLumi = 30e+06 #in pb-1 -ana_tex = "HH bbtautau" -delphesVersion = "3.4.2" -energy = 100 -collider = "FCC-hh" -inputDir = "FCChh/HH_bbtautau/" -formats = ['png','pdf'] -yaxis = ['lin','log'] -stacksig = ['stack','nostack'] -outdir = 'FCChh/HH_bbtautau/plots/' - -variables = ['jet_pt_0','jet_pt_1','jet_pt_2','jet_pt_3','jet_pt_4','jet_pt_5'] - -###Dictonnary with the analysis name as a key, and the list of selections to be plotted for this analysis. The name of the selections should be the same than in the final selection -selections = {} -selections['HH_bbtautau'] = ['sel0'] - -extralabel = {} -extralabel['sel0'] = "Selection: jet pt > 30GeV" - - -colors = {} -colors['HH_bbtautau'] = ROOT.kRed -colors['tt'] = ROOT.kBlue+1 - -plots = {} -plots['HH_bbtautau'] = {'signal':{'HH_bbtautau':['pwp8_pp_hh_lambda100_5f_hhbbaa']}, - 'backgrounds':{'tt':['mgp8_pp_tt012j_5f']} - } - -legend = {} -legend['HH_bbtautau'] = 'HH->bbtautau' -legend['tt'] = 'tt' - - - - diff --git a/examples/FCChh/HH_bbtautau/preSel.py b/examples/FCChh/HH_bbtautau/preSel.py deleted file mode 100644 index 992fdb3070..0000000000 --- a/examples/FCChh/HH_bbtautau/preSel.py +++ /dev/null @@ -1,26 +0,0 @@ -#python FCChhAnalyses/FCChh/ttHH/dataframe/preSel.py -from process import get_process_dict_dirs -import os - -basedir = os.path.join(get_process_dict_dirs()[0] + "yaml/FCC/fcc_v04/" -outdir="FCChh/HH_bbtautau/" -NUM_CPUS = 20 -process_list=['pwp8_pp_hh_lambda100_5f_hhbbaa', - #'mgp8_pp_bbtata_QED', - #'mgp8_pp_bbtata_QCDQED', - #'mg_pp_bbjj_QCD_5f', - 'mgp8_pp_tt012j_5f', - #'mg_pp_h012j_5f', - #'mg_pp_vh012j_5f', - #'mg_pp_ttw_5f', - #'mg_pp_ttww_4f', - #'mg_pp_ttwz_5f', - #'mgp8_pp_ttz_5f', - #'mgp8_pp_ttzz_5f', - #'mgp8_pp_tth01j_5f' - ] -fraction=1 - -import bin.runDataFrame as rdf -myana=rdf.runDataFrame(basedir,process_list) -myana.run(ncpu=NUM_CPUS,fraction=fraction,outDir=outdir) diff --git a/examples/FCChh/ggHH_bbyy/analysis_final.py b/examples/FCChh/ggHH_bbyy/analysis_final.py new file mode 100644 index 0000000000..41faab2498 --- /dev/null +++ b/examples/FCChh/ggHH_bbyy/analysis_final.py @@ -0,0 +1,50 @@ +#Input directory where the files produced at the pre-selection level are +inputDir = "outputs/FCChh/ggHH_bbyy/presel/" + +#Input directory where the files produced at the pre-selection level are +outputDir = "outputs/FCChh/ggHH_bbyy/final/" + +processList = { + 'pwp8_pp_hh_5f_hhbbyy':{}, #output file from analysis_stage1.py +} + +#Link to the dictonary that contains all the cross section informations etc... +procDict = "/eos/experiment/fcc/hh/tutorials/edm4hep_tutorial_data/FCChh_procDict_tutorial.json" +#Note the numbeOfEvents and sumOfWeights are placeholders that get overwritten with the correct values in the samples + +#How to add a process that is not in the official dictionary: +# procDictAdd={"pwp8_pp_hh_5f_hhbbyy": {"numberOfEvents": 4980000, "sumOfWeights": 4980000.0, "crossSection": 0.0029844128399999998, "kfactor": 1.075363, "matchingEfficiency": 1.0}} + +# Expected integrated luminosity +intLumi = 30e+06 # pb-1 + +# Whether to scale to expected integrated luminosity +doScale = True + +#Number of CPUs to use +nCPUS = 2 + +#produces ROOT TTrees, default is False +doTree = True + +saveTabular = True + +# Optional: Use weighted events +do_weighted = True + +# Dictionary of the list of cuts. The key is the name of the selection that will be added to the output file +cutList = { + "sel0_myy":"m_yy[0] > 100. && m_yy[0] < 180.", + "sel1_mbb":"(m_yy[0] > 100. && m_yy[0] < 180.) && (m_bb[0] > 80. && m_bb[0] < 200.)", + } + +# Dictionary for the output variable/histograms. The key is the name of the variable in the output files. "name" is the name of the variable in the input file, "title" is the x-axis label of the histogram, "bin" the number of bins of the histogram, "xmin" the minimum x-axis value and "xmax" the maximum x-axis value. +histoList = { + "myy":{"name":"m_yy","title":"m_{#gamma#gamma} [GeV]","bin":50,"xmin":0,"xmax":200}, + "myy_zoom":{"name":"m_yy","title":"m_{#gamma#gamma} [GeV]","bin":50,"xmin":100,"xmax":180}, + "mbb":{"name":"m_bb","title":"m_{bb} [GeV]","bin":50,"xmin":0,"xmax":250}, + "mbb_zoom":{"name":"m_bb","title":"m_{b} [GeV]","bin":50,"xmin":80,"xmax":200}, + "y1_pT":{"name":"g1_pt","title":"pT_{#gamma1} [GeV]","bin":50,"xmin":0.,"xmax":200.}, + "y2_pT":{"name":"g2_pt","title":"pT_{#gamma2} [GeV]","bin":50,"xmin":0.,"xmax":200.}, + "pT_y1_vs_y2_2D":{"cols":["g1_pt", "g2_pt"],"title":"m_{Z} - leptonic recoil [GeV]", "bins": [(40,80,100), (100,120,140)]}, # 2D histogram +} \ No newline at end of file diff --git a/examples/FCChh/ggHH_bbyy/analysis_plot_tag_eff.py b/examples/FCChh/ggHH_bbyy/analysis_plot_tag_eff.py new file mode 100644 index 0000000000..ac426b91df --- /dev/null +++ b/examples/FCChh/ggHH_bbyy/analysis_plot_tag_eff.py @@ -0,0 +1,156 @@ +''' +Analysis example for FCC-hh, using gg->HH->bbyy di-Higgs production events to check the Delphes b-tagging efficiencies +''' +from argparse import ArgumentParser + +# Mandatory: Analysis class where the user defines the operations on the +# dataframe. +class Analysis(): + ''' + Validation of Delphes b-tagging efficiencies in HH->bbyy events. + ''' + def __init__(self, cmdline_args): + parser = ArgumentParser( + description='Additional analysis arguments', + usage='Provide additional arguments after analysis script path') + # parser.add_argument('--bjet-pt', default='10.', type=float, + # help='Minimal pT of the selected b-jets.') + # Parse additional arguments not known to the FCCAnalyses parsers + # All command line arguments know to fccanalysis are provided in the + # `cmdline_arg` dictionary. + self.ana_args, _ = parser.parse_known_args(cmdline_args['unknown']) + + # Mandatory: List of processes to run over + self.process_list = { + # # Add your processes like this: + ## '':{'fraction':, 'chunks':, 'output': }, + # # - needs to correspond either the name of the input .root file, or the name of a directory containing root files + # # If you want to process only part of the events, split the output into chunks or give a different name to the output use the optional arguments + # # or leave blank to use defaults = run the full statistics in one output file named the same as the process: + 'pwp8_pp_hh_5f_hhbbyy': {}, + } + + # Mandatory: Input directory where to find the samples, or a production tag when running over the centrally produced + # samples (this points to the yaml files for getting sample statistics) + self.input_dir = '/eos/experiment/fcc/hh/tutorials/edm4hep_tutorial_data/' + + # Optional: output directory, default is local running directory + self.output_dir = 'outputs/FCChh/ggHH_bbyy/nosel/' + + # Optional: analysisName, default is '' + # self.analysis_name = 'My Analysis' + + # Optional: number of threads to run on, default is 'all available' + # self.n_threads = 4 + + # Optional: running on HTCondor, default is False + # self.run_batch = False + + # Optional: Use weighted events + self.do_weighted = True + + # Optional: test file that is used if you run with the --test argument (fccanalysis run ./examples/FCChh/ggHH_bbyy/analysis_stage1.py --test) + self.test_file = 'root://eospublic.cern.ch//eos/experiment/fcc/hh/' \ + 'tutorials/edm4hep_tutorial_data/' \ + 'pwp8_pp_hh_5f_hhbbyy.root' + + + # Mandatory: analyzers function to define the analysis graph, please make + # sure you return the dataframe, in this example it is dframe2 + def analyzers(self, dframe): + ''' + Analysis graph. + ''' + + dframe2 = ( + dframe + + .Define("weight", "EventHeader.weight") + + ########################################### JETS ########################################### + + #LOOSE WP + .Define("b_tagged_jets_loose", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 0)") #bit 0 = loose WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl + .Define("n_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_n(b_tagged_jets_loose)") + .Define("px_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_px(b_tagged_jets_loose)") + .Define("py_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_py(b_tagged_jets_loose)") + .Define("pz_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_pz(b_tagged_jets_loose)") + .Define("E_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_e(b_tagged_jets_loose)") + .Define("pT_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_pt(b_tagged_jets_loose)") + .Define("eta_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_eta(b_tagged_jets_loose)") + + #MEDIUM WP + .Define("b_tagged_jets_medium", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 1)") #bit 1 = medium WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl + .Define("n_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_n(b_tagged_jets_medium)") + .Define("px_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_px(b_tagged_jets_medium)") + .Define("py_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_py(b_tagged_jets_medium)") + .Define("pz_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_pz(b_tagged_jets_medium)") + .Define("E_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_e(b_tagged_jets_medium)") + .Define("pT_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_pt(b_tagged_jets_medium)") + .Define("eta_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_eta(b_tagged_jets_medium)") + + #TIGHT WP + .Define("b_tagged_jets_tight", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 2)") #bit 2 = tight WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl + .Define("n_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_n(b_tagged_jets_tight)") + .Define("px_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_px(b_tagged_jets_tight)") + .Define("py_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_py(b_tagged_jets_tight)") + .Define("pz_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_pz(b_tagged_jets_tight)") + .Define("E_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_e(b_tagged_jets_tight)") + .Define("pT_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_pt(b_tagged_jets_tight)") + .Define("eta_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_eta(b_tagged_jets_tight)") + + ########################################### MC PARTICLES ########################################### + + #all MC particles + .Define("mc_particles", "Particle") + .Alias("mc_parents", "_Particle_parents.index") + .Alias("mc_daughters", "_Particle_daughters.index") + + + ################################# Gen matched b-jets for b-tag eff study ########################################### + .Define("MC_b", "AnalysisFCChh::getBhadron(mc_particles,mc_parents)") + .Define("jets_genmatched_b", "AnalysisFCChh::find_reco_matches(MC_b, Jet, 0.4)") + + .Define("bjets_loose_genmatched_b", "AnalysisFCChh::find_reco_matches(MC_b, b_tagged_jets_loose, 0.4)") + .Define("bjets_medium_genmatched_b", "AnalysisFCChh::find_reco_matches(MC_b, b_tagged_jets_medium, 0.4)") + .Define("bjets_tight_genmatched_b", "AnalysisFCChh::find_reco_matches(MC_b, b_tagged_jets_tight, 0.4)") + + #all genmatched + .Define("n_jets_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_n(jets_genmatched_b)") + .Define("pT_jets_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_pt(jets_genmatched_b)") + .Define("eta_jets_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_eta(jets_genmatched_b)") + + #loose btag + .Define("n_bjets_loose_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_n(bjets_loose_genmatched_b)") + .Define("pT_bjets_loose_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_pt(bjets_loose_genmatched_b)") + .Define("eta_bjets_loose_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_eta(bjets_loose_genmatched_b)") + + #medium btag + .Define("n_bjets_medium_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_n(bjets_medium_genmatched_b)") + .Define("pT_bjets_medium_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_pt(bjets_medium_genmatched_b)") + .Define("eta_bjets_medium_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_eta(bjets_medium_genmatched_b)") + + #tight btag + .Define("n_bjets_tight_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_n(bjets_tight_genmatched_b)") + .Define("pT_bjets_tight_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_pt(bjets_tight_genmatched_b)") + .Define("eta_bjets_tight_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_eta(bjets_tight_genmatched_b)") + + ) + return dframe2 + + # Mandatory: output function, please make sure you return the branch list + # as a python list + def output(self): + ''' + Output variables which will be saved to output root file. + ''' + branch_list = [ + 'weight', + #gen-matched b-jets to check efficiencies + 'n_b_jets_loose', 'n_b_jets_medium', 'n_b_jets_tight', + 'n_jets_genmatched_b', 'pT_jets_genmatched_b', 'eta_jets_genmatched_b', + 'n_bjets_loose_genmatched_b', 'pT_bjets_loose_genmatched_b', 'eta_bjets_loose_genmatched_b', + 'n_bjets_medium_genmatched_b', 'pT_bjets_medium_genmatched_b', 'eta_bjets_medium_genmatched_b', + 'n_bjets_tight_genmatched_b', 'pT_bjets_tight_genmatched_b', 'eta_bjets_tight_genmatched_b', + ] + return branch_list diff --git a/examples/FCChh/ggHH_bbyy/analysis_plots.py b/examples/FCChh/ggHH_bbyy/analysis_plots.py new file mode 100644 index 0000000000..137d863987 --- /dev/null +++ b/examples/FCChh/ggHH_bbyy/analysis_plots.py @@ -0,0 +1,37 @@ +import ROOT + +# global parameters +intLumi = 30e+06 #in pb-1 +ana_tex = 'pp #rightarrow HH #rightarrow b #bar{b} #gamma #gamma ' +delphesVersion = '3.4.2' +energy = 100 +collider = 'FCC-hh' +inputDir = 'outputs/FCChh/ggHH_bbyy/final/' +formats = ['png','pdf'] +yaxis = ['lin','log'] +stacksig = ['nostack'] +# stacksig = ['stack','nostack'] +outdir = 'outputs/FCChh/ggHH_bbyy/plots/' +plotStatUnc = True + +variables = ['myy','myy_zoom', 'mbb', 'mbb_zoom', 'y1_pT','y2_pT'] + +# rebin = [1, 1, 1, 1, 2] # uniform rebin per variable (optional) + +### Dictionary with the analysis name as a key, and the list of selections to be plotted for this analysis. The name of the selections should be the same than in the final selection +selections = {} +selections['bbyy_analysis'] = ["sel0_myy","sel1_mbb"] + +extralabel = {} +extralabel['sel0_myy'] = "Sel.: 100 < m_{#gamma#gamma} < 180 GeV" +extralabel['sel1_mbb'] = "Sel.: 100 < m_{#gamma#gamma} < 180 GeV and 80 < m_{bb} < 200 GeV" + +colors = {} +colors['bbyy_signal'] = ROOT.kRed + +plots = {} +plots['bbyy_analysis'] = {'signal':{'bbyy_signal':['pwp8_pp_hh_5f_hhbbyy']}, + } + +legend = {} +legend['bbyy_signal'] = 'HH' \ No newline at end of file diff --git a/examples/FCChh/ggHH_bbyy/analysis_stage1.py b/examples/FCChh/ggHH_bbyy/analysis_stage1.py new file mode 100644 index 0000000000..b9ddd7df82 --- /dev/null +++ b/examples/FCChh/ggHH_bbyy/analysis_stage1.py @@ -0,0 +1,143 @@ +''' +Analysis example for FCC-hh, using gg->HH->bbyy di-Higgs production events +''' +from argparse import ArgumentParser + + +# Mandatory: Analysis class where the user defines the operations on the +# dataframe. +class Analysis(): + ''' + Di-Higgs analysis in bbyy. + ''' + def __init__(self, cmdline_args): + parser = ArgumentParser( + description='Additional analysis arguments', + usage='Provide additional arguments after analysis script path') + # parser.add_argument('--bjet-pt', default='10.', type=float, + # help='Minimal pT of the selected b-jets.') + # Parse additional arguments not known to the FCCAnalyses parsers + # All command line arguments know to fccanalysis are provided in the + # `cmdline_arg` dictionary. + self.ana_args, _ = parser.parse_known_args(cmdline_args['unknown']) + + # Mandatory: List of processes to run over + self.process_list = { + # # Add your processes like this: + ## '':{'fraction':, 'chunks':, 'output': }, + # # - needs to correspond either the name of the input .root file, or the name of a directory containing root files + # # If you want to process only part of the events, split the output into chunks or give a different name to the output use the optional arguments + # # or leave blank to use defaults = run the full statistics in one output file named the same as the process: + 'pwp8_pp_hh_5f_hhbbyy': {}, + } + + # Mandatory: Input directory where to find the samples, or a production tag when running over the centrally produced + # samples (this points to the yaml files for getting sample statistics) + self.input_dir = '/eos/experiment/fcc/hh/tutorials/edm4hep_tutorial_data/' + + # Optional: output directory, default is local running directory + self.output_dir = 'outputs/FCChh/ggHH_bbyy/presel/' + + # Optional: analysisName, default is '' + self.analysis_name = 'FCC-hh bbyy analysis' + + # Optional: number of threads to run on, default is 'all available' + # self.n_threads = 4 + + # Optional: running on HTCondor, default is False + # self.run_batch = False + + # Optional: Use weighted events + self.do_weighted = True + + # Optional: read the input files with podio::DataSource + self.use_data_source = False # explicitly use old way in this version + + # Optional: test file that is used if you run with the --test argument (fccanalysis run ./examples/FCChh/ggHH_bbyy/analysis_stage1.py --test) + self.test_file = 'root://eospublic.cern.ch//eos/experiment/fcc/hh/' \ + 'tutorials/edm4hep_tutorial_data/' \ + 'pwp8_pp_hh_5f_hhbbyy.root' + + + # Mandatory: analyzers function to define the analysis graph, please make + # sure you return the dataframe, in this example it is dframe2 + def analyzers(self, dframe): + ''' + Analysis graph. + ''' + + dframe2 = ( + dframe + + ########################################### DEFINITION OF VARIABLES ########################################### + + # generator event weight + .Define("weight", "EventHeader.weight") + + ########################################### PHOTONS ########################################### + + .Define("gamma", "FCCAnalyses::ReconstructedParticle::get(Photon_objIdx.index, ReconstructedParticles)") + .Define("selpt_gamma", "FCCAnalyses::ReconstructedParticle::sel_pt(30.)(gamma)") + .Define("sel_gamma_unsort", "FCCAnalyses::ReconstructedParticle::sel_eta(4)(selpt_gamma)") + .Define("sel_gamma", "AnalysisFCChh::SortParticleCollection(sel_gamma_unsort)") #sort by pT + + .Define("n_gamma", "FCCAnalyses::ReconstructedParticle::get_n(sel_gamma)") + .Define("g1_e", "FCCAnalyses::ReconstructedParticle::get_e(sel_gamma)[0]") + .Define("g1_pt", "FCCAnalyses::ReconstructedParticle::get_pt(sel_gamma)[0]") + .Define("g1_eta", "FCCAnalyses::ReconstructedParticle::get_eta(sel_gamma)[0]") + .Define("g1_phi", "FCCAnalyses::ReconstructedParticle::get_phi(sel_gamma)[0]") + .Define("g2_e", "FCCAnalyses::ReconstructedParticle::get_e(sel_gamma)[1]") + .Define("g2_pt", "FCCAnalyses::ReconstructedParticle::get_pt(sel_gamma)[1]") + .Define("g2_eta", "FCCAnalyses::ReconstructedParticle::get_eta(sel_gamma)[1]") + .Define("g2_phi", "FCCAnalyses::ReconstructedParticle::get_phi(sel_gamma)[1]") + + # H(yy) if it exists, if there are no 2 selected photons, doesnt get filled + .Define("yy_pairs_unmerged", "AnalysisFCChh::getPairs(sel_gamma)") # retrieves the leading pT pair of all possible + .Define("yy_pairs", "AnalysisFCChh::merge_pairs(yy_pairs_unmerged)") # merge pair into one object to access inv masses etc + .Define("m_yy", "FCCAnalyses::ReconstructedParticle::get_mass(yy_pairs)") + + ########################################### JETS ########################################### + + # b-tagged jets at medium working point + .Define("b_tagged_jets_medium", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 1)") #bit 1 = medium WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl + # select medium b-jets with pT > 30 GeV, |eta| < 4 + .Define("selpt_bjets", "FCCAnalyses::ReconstructedParticle::sel_pt(30.)(b_tagged_jets_medium)") + .Define("sel_bjets_unsort", "FCCAnalyses::ReconstructedParticle::sel_eta(4)(selpt_bjets)") + .Define("sel_bjets", "AnalysisFCChh::SortParticleCollection(sel_bjets_unsort)") #sort by pT + .Define("n_bjets", "FCCAnalyses::ReconstructedParticle::get_n(sel_bjets)") + .Define("b1_e", "FCCAnalyses::ReconstructedParticle::get_e(sel_bjets)[0]") + .Define("b1_pt", "FCCAnalyses::ReconstructedParticle::get_pt(sel_bjets)[0]") + .Define("b1_eta", "FCCAnalyses::ReconstructedParticle::get_eta(sel_bjets)[0]") + .Define("b1_phi", "FCCAnalyses::ReconstructedParticle::get_phi(sel_bjets)[0]") + .Define("b2_e", "FCCAnalyses::ReconstructedParticle::get_e(sel_bjets)[1]") + .Define("b2_pt", "FCCAnalyses::ReconstructedParticle::get_pt(sel_bjets)[1]") + .Define("b2_eta", "FCCAnalyses::ReconstructedParticle::get_eta(sel_bjets)[1]") + .Define("b2_phi", "FCCAnalyses::ReconstructedParticle::get_phi(sel_bjets)[1]") + + # H(bb) system - using the medium WP jets - if there are no 2 b-tagged jets these variable don't get filled + .Define("bb_pairs_unmerged", "AnalysisFCChh::getPairs(sel_bjets)") # retrieves the leading pT pair of all possible + .Define("bb_pairs", "AnalysisFCChh::merge_pairs(bb_pairs_unmerged)") # merge pair into one object to access inv masses etc + .Define("m_bb", "FCCAnalyses::ReconstructedParticle::get_mass(bb_pairs)") + + ########################################### APPLY PRE-SELECTION ########################################### + # require at least two b-jets and two photons + .Filter("n_gamma > 1") + .Filter("n_bjets > 1") + + ) + return dframe2 + + # Mandatory: output function, please make sure you return the branch list + # as a python list + def output(self): + ''' + Output variables which will be saved to output root file. + ''' + branch_list = [ + 'weight', + # Photons and H(yy) system: + 'n_gamma', 'g1_pt', 'g2_pt', 'g1_eta', 'g2_eta', 'm_yy', + # b-jets and H(bb) system: + 'n_bjets', 'b1_pt', 'b2_pt', 'b1_eta', 'b2_eta', 'm_bb', + ] + return branch_list \ No newline at end of file diff --git a/examples/FCChh/ggHH_bbyy/plot_tag_eff.py b/examples/FCChh/ggHH_bbyy/plot_tag_eff.py new file mode 100644 index 0000000000..809d854661 --- /dev/null +++ b/examples/FCChh/ggHH_bbyy/plot_tag_eff.py @@ -0,0 +1,199 @@ +from collections import namedtuple +import ROOT +import argparse +import os +import numpy as np + +ROOT.gROOT.SetBatch() +ROOT.gStyle.SetOptTitle(0) + +def getHfromRDF(hist): + h = None + t = hist.GetValue() + h = t.Clone() + return h + +def makePlots(dict_of_vars, sel_cutstring, input_filepath, out_dir_base, out_format = ".png", do_log_y=False): + + if not os.path.exists(out_dir_base): + os.mkdir(out_dir_base) + + rdf = ROOT.RDataFrame("events", input_filepath) + + if sel_cutstring: + rdf = rdf.Filter(sel_cutstring) #apply the selection + + if not rdf: + print("Empty file for:", input_filepath, " Exiting.") + return + + for plot_name, plot in dict_of_vars.items(): + print("Plotting:", plot_name) + + has_variable_binning = False + if not isinstance(plot.nbins, int): + has_variable_binning = True + hist_binEdges = np.array(ref_var.nbins, dtype=float) + hist_nBins = len(plot.nbins)-1 + model = ROOT.RDF.TH1DModel(plot_name+"_model_hist", plot_name, hist_nBins, hist_binEdges) + else: + model = ROOT.RDF.TH1DModel(plot_name+"_model_hist", plot_name, plot.nbins, plot.xmin, plot.xmax) + + tmp_hist = rdf.Histo1D(model, plot.name) + if not tmp_hist.GetEntries(): + print("Empty histogram for:", input_filepath, " Exiting.") + return + + tmp_hist.GetXaxis().SetTitle(plot.label) + tmp_hist.GetYaxis().SetTitle("Raw MC events") + tmp_hist.SetFillColor(ROOT.kCyan+2) + tmp_hist.SetLineColor(ROOT.kCyan+2) + + #write out + if sel_cutstring == "": + filename = "bbtautau_noSel_"+plot_name+out_format + else: + filename = "bbtautau_withSel_"+plot_name+out_format + fileout = os.path.join(out_dir_base, filename) + + canvas = ROOT.TCanvas("canvas", "canvas", 800, 800) + tmp_hist.Draw("hist same") + canvas.SaveAs(fileout) + +def makeEffPlots(dict_of_vars, ref_var, sel_cutstring, input_filepath, out_dir_base, out_format = ".png", do_log_y=False): + + if not os.path.exists(out_dir_base): + os.mkdir(out_dir_base) + + rdf = ROOT.RDataFrame("events", input_filepath) + + if sel_cutstring: + rdf = rdf.Filter(sel_cutstring) #apply the selection + + if not rdf: + print("Empty file for:", input_filepath, " Exiting.") + return + + #denominator hist + print("Plotting at denominator:", ref_var.name) + has_variable_binning = False + + if not isinstance(ref_var.nbins, int): + has_variable_binning = True + hist_binEdges = np.array(ref_var.nbins, dtype=float) + hist_nBins = len(ref_var.nbins)-1 + model = ROOT.RDF.TH1DModel(ref_var.name+"_model_hist", ref_var.name, hist_nBins, hist_binEdges) + else: + model = ROOT.RDF.TH1DModel(ref_var.name+"_model_hist", ref_var.name, ref_var.nbins, ref_var.xmin, ref_var.xmax) + + tmp_hist_den = rdf.Histo1D(model, ref_var.name) + + if not tmp_hist_den.GetEntries(): + print("Empty histogram for:", input_filepath, " Exiting.") + return + else: + print("Number entries denumerator: ", tmp_hist_den.GetEntries()) + tmp_den = getHfromRDF(tmp_hist_den) + #tmp_hist.GetXaxis().SetTitle(plot.label) + #tmp_hist.GetYaxis().SetTitle("Raw MC events") + #tmp_hist.SetFillColor(ROOT.kCyan+2) + #tmp_hist.SetLineColor(ROOT.kCyan+2) + + for plot_name, plot in dict_of_vars.items(): + print("Plotting:", plot_name) + + has_variable_binning = False + if not isinstance(plot.nbins, int): + has_variable_binning = True + hist_binEdges = np.array(ref_var.nbins, dtype=float) + hist_nBins = len(plot.nbins)-1 + model = ROOT.RDF.TH1DModel(plot_name+"_model_hist", plot_name, hist_nBins, hist_binEdges) + else: + model = ROOT.RDF.TH1DModel(plot_name+"_model_hist", plot_name, plot.nbins, plot.xmin, plot.xmax) + + tmp_hist_num = rdf.Histo1D(model, plot.name) + + if not tmp_hist_num.GetEntries(): + print("Empty histogram for:", input_filepath, " Exiting.") + return + else: + print("Number entries numerator: ", tmp_hist_num.GetEntries()) + + tmp_num = getHfromRDF(tmp_hist_num) + + if ROOT.TEfficiency.CheckConsistency(tmp_num, tmp_den): + Eff = ROOT.TEfficiency(tmp_num, tmp_den) + Eff.SetTitle(";"+plot.label+";Efficiency") + Eff.SetTitle(plot_name) + ptext = ROOT.TPaveText(.7,.85,.85,.9,option='NDC') + if "loose" in plot_name: + ptext.AddText("loose tag") + elif "medium" in plot_name: + ptext.AddText("medium tag") + elif "tight" in plot_name: + ptext.AddText("tight tag") + else: + ptext.AddText(plot_name) + #write out + if sel_cutstring == "": + filename = "bbtautau_noSel_Eff"+plot_name+out_format + else: + filename = "bbtautau_withSel_Eff"+plot_name+out_format + fileout = os.path.join(out_dir_base, filename) + + canvas = ROOT.TCanvas("canvas", "canvas", 900, 700) + canvas.SetGrid() + Eff.Draw("AP") + canvas.Update() + graph = Eff.GetPaintedGraph() + graph.SetMinimum(0.6) + graph.SetMaximum(1) + canvas.Update() + canvas.SetLogx() + ptext.Draw() + canvas.SaveAs(fileout) + else: + print("The histograms are not consistent!") + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Plot b-tagging efficiencies from Delphes") + parser.add_argument('--input', '-i', metavar="INPUTDIR", dest="inFile", required=True, help="Path to the input file.") + parser.add_argument('--outdir', '-o', metavar="OUTPUTDIR", dest="outDir", required=True, help="Output directory.") + args = parser.parse_args() + + #use a custom namedntuple to transfer the plotting info + PlotSpecs = namedtuple('PlotSpecs', ['name', 'xmin', 'xmax', 'label', 'nbins']) + + #plot some variables without any selection: + bbyy_plots_nosel = { + "n_b_jets_loose":PlotSpecs(name="n_b_jets_loose", xmin=0, xmax=5, label="n_b_jets_loose", nbins=5), + "n_b_jets_medium":PlotSpecs(name="n_b_jets_medium", xmin=0, xmax=5, label="n_b_jets_medium", nbins=5), + "n_b_jets_tight":PlotSpecs(name="n_b_jets_tight", xmin=0, xmax=5, label="n_b_jets_tight", nbins=5), + "n_jets_genmatched_b": PlotSpecs(name="n_jets_genmatched_b", xmin=0, xmax=5, label="n_jets_genmatched_b", nbins=5), + "n_bjets_loose_genmatched_b": PlotSpecs(name="n_bjets_loose_genmatched_b", xmin=0, xmax=5, label="n_bjets_loose_genmatched_b", nbins=5), + "n_bjets_medium_genmatched_b": PlotSpecs(name="n_bjets_medium_genmatched_b", xmin=0, xmax=5, label="n_bjets_medium_genmatched_b", nbins=5), + "n_bjets_tight_genmatched_b": PlotSpecs(name="n_bjets_tight_genmatched_b", xmin=0, xmax=5, label="n_bjets_tight_genmatched_b", nbins=5), + } + + sel_cuts_empty = "" + makePlots(bbyy_plots_nosel, sel_cuts_empty, args.inFile, args.outDir) + + #bjet tagging efficiencies: + binedges=[20,50,100,200,500,1000,5000] #same pT binning as in the delphes card + bbyy_plot_pTeff={ + "pT_bjets_loose_genmatched_b":PlotSpecs(name="pT_bjets_loose_genmatched_b", xmin=0., xmax=1000., label="p_{T} bjets [GeV]", nbins=binedges), + "pT_bjets_medium_genmatched_b":PlotSpecs(name="pT_bjets_medium_genmatched_b", xmin=0., xmax=1000., label="p_{T} bjets [GeV]", nbins=binedges), + "pT_bjets_tight_genmatched_b":PlotSpecs(name="pT_bjets_tight_genmatched_b", xmin=0., xmax=1000., label="p_{T} bjets [GeV]", nbins=binedges), + } + #select only one eta bin: + sel_cuts_eta_4 = "n_jets_genmatched_b == 2 && abs(eta_jets_genmatched_b[0]) < 4. && abs(eta_jets_genmatched_b[1]) < 4." + + pT_ref = PlotSpecs(name="pT_jets_genmatched_b", xmin=0., xmax=1000., label="p_{T} #tau_{h} [GeV]", nbins=binedges) + eta_ref = PlotSpecs(name="eta_jets_genmatched_b", xmin=-5, xmax=5., label="#eta #tau_{h}", nbins=20) + + makeEffPlots(bbyy_plot_pTeff, pT_ref, sel_cuts_eta_4, args.inFile, args.outDir) + +# run it with: +# python plot_tag_eff.py -i -o diff --git a/examples/FCChh/ttHH/analysis.py b/examples/FCChh/ttHH/analysis.py deleted file mode 100644 index 603c6773a9..0000000000 --- a/examples/FCChh/ttHH/analysis.py +++ /dev/null @@ -1,71 +0,0 @@ -import sys -import ROOT - -print ("Load cxx analyzers ... ",) -ROOT.gSystem.Load("libedm4hep") -ROOT.gSystem.Load("libpodio") -ROOT.gSystem.Load("libFCCAnalyses") -ROOT.gErrorIgnoreLevel = ROOT.kFatal -_edm = ROOT.edm4hep.ReconstructedParticleData() -_pod = ROOT.podio.ObjectID() -_fcc = ROOT.getMC_px - -print ('edm4hep ',_edm) -print ('podio ',_pod) -print ('fccana ',_fcc) - -class analysis(): - - #__________________________________________________________ - def __init__(self, inputlist, outname, ncpu): - self.outname = outname - if ".root" not in outname: - self.outname+=".root" - - ROOT.ROOT.EnableImplicitMT(ncpu) - - self.df = ROOT.RDataFrame("events", inputlist) - print (" done") - #__________________________________________________________ - def run(self): - - df2 = (self.df - .Define("selected_jets", "selRP_pT(50.)(Jet)") - .Define("jet_pT", "getRP_pt(Jet)") - .Define("seljet_pT", "getRP_pt(selected_jets)") - - ) - - # select branches for output file - branchList = ROOT.vector('string')() - for branchName in [ - "jet_pT", - "seljet_pT", - - ]: - branchList.push_back(branchName) - df2.Snapshot("events", self.outname, branchList) - -# example call for standalone file -# python FCChhAnalyses/FCChh/ttHH/dataframe/analysis.py /eos/experiment/fcc/hh/generation/DelphesEvents/fcc_v04/mgp8_pp_tthh_lambda100_5f/events_152512217.root - -if __name__ == "__main__": - - if len(sys.argv)==1: - print ("usage:") - print ("python ",sys.argv[0]," file.root") - sys.exit(3) - infile = sys.argv[1] - outDir = 'FCChh/'+sys.argv[0].split('/')[2]+'/' - import os - os.system("mkdir -p {}".format(outDir)) - outfile = outDir+infile.split('/')[-1] - ncpus = 0 - analysis = analysis(infile, outfile, ncpus) - analysis.run() - - tf = ROOT.TFile(infile) - entries = tf.events.GetEntries() - p = ROOT.TParameter(int)( "eventsProcessed", entries) - outf=ROOT.TFile(outfile,"UPDATE") - p.Write() diff --git a/examples/FCChh/ttHH/finalSel.py b/examples/FCChh/ttHH/finalSel.py deleted file mode 100644 index 0e609e41bd..0000000000 --- a/examples/FCChh/ttHH/finalSel.py +++ /dev/null @@ -1,47 +0,0 @@ -#python FCChhAnalyses/FCChh/ttHH/dataframe/finalSel.py -import sys, os -import ROOT - -###Input directory where the files produced at the pre-selection level are -baseDir = "FCChh/ttHH/" - -###Link to the dictonary that contains all the cross section informations etc... -procDict = 'FCC_procDict_fcc_v04.json' - -process_list=['mgp8_pp_tthh_lambda100_5f', - 'mgp8_pp_ttz_5f', - 'mgp8_pp_ttzz_5f', - 'mgp8_pp_tth01j_5f' - ] - -###Dictionnay of the list of cuts. The key is the name of the selection that will be added to the output file -cut_list = {"sel0":"seljet_pT.size()>6", - } - -###Optinally Define new variables -define_list = {"seljet_pT_0":"seljet_pT.at(0)", - "seljet_pT_1":"seljet_pT.at(1)", - "seljet_pT_2":"seljet_pT.at(2)", - "seljet_pT_3":"seljet_pT.at(3)", - "seljet_pT_4":"seljet_pT.at(4)", - "seljet_pT_5":"seljet_pT.at(5)"} - -###Dictionary for the ouput variable/hitograms. The key is the name of the variable in the output files. "name" is the name of the variable in the input file, "title" is the x-axis label of the histogram, "bin" the number of bins of the histogram, "xmin" the minimum x-axis value and "xmax" the maximum x-axis value. -variables = { - "jet_pt_0":{"name":"seljet_pT_0","title":"Leading jet p_{T} [GeV]","bin":100,"xmin":0,"xmax":4000}, - "jet_pt_1":{"name":"seljet_pT_1","title":"Sub leading jet p_{T} [GeV]","bin":100,"xmin":0,"xmax":4000}, - "jet_pt_2":{"name":"seljet_pT_2","title":"3rd jet p_{T} [GeV]","bin":100,"xmin":0,"xmax":4000}, - "jet_pt_3":{"name":"seljet_pT_3","title":"4th jet p_{T} [GeV]","bin":100,"xmin":0,"xmax":4000}, - "jet_pt_4":{"name":"seljet_pT_4","title":"5th jet p_{T} [GeV]","bin":100,"xmin":0,"xmax":4000}, - "jet_pt_5":{"name":"seljet_pT_5","title":"6th jet p_{T} [GeV]","bin":100,"xmin":0,"xmax":4000}, -} - -###Number of CPUs to use -NUM_CPUS = 5 - -###This part is standard to all analyses -sys.path.append('./bin') -import runDataFrameFinal as rdf -#import bin.runDataFrameFinal as rdf -myana=rdf.runDataFrameFinal(baseDir,procDict,process_list,cut_list,variables,defines=define_list) -myana.run(ncpu=NUM_CPUS) diff --git a/examples/FCChh/ttHH/plots.py b/examples/FCChh/ttHH/plots.py deleted file mode 100644 index a3d9cc7b36..0000000000 --- a/examples/FCChh/ttHH/plots.py +++ /dev/null @@ -1,47 +0,0 @@ -import ROOT - -# global parameters -intLumi = 30e+06 #in pb-1 -ana_tex = "ttHH inclusive" -delphesVersion = "3.4.2" -energy = 100 -collider = "FCC-hh" -inputDir = "FCChh/ttHH/" -formats = ['png','pdf'] -yaxis = ['lin','log'] -stacksig = ['stack','nostack'] -outdir = 'FCChh/ttHH/plots/' - -variables = ['jet_pt_0','jet_pt_1','jet_pt_2','jet_pt_3','jet_pt_4','jet_pt_5'] - -###Dictonnary with the analysis name as a key, and the list of selections to be plotted for this analysis. The name of the selections should be the same than in the final selection -selections = {} -selections['ttHH'] = ['sel0'] - -extralabel = {} -extralabel['sel0'] = "Selection: jet pt > 30GeV" - - -colors = {} -colors['ttHH'] = ROOT.kRed -colors['ttH'] = ROOT.kBlue+1 -colors['ttZ'] = ROOT.kGreen+2 -colors['ttZZ'] = ROOT.kGreen+3 - -plots = {} -plots['ttHH'] = {'signal':{'ttHH':['mgp8_pp_tthh_lambda100_5f']}, - 'backgrounds':{'ttZ':['mgp8_pp_ttz_5f'], - 'ttZZ':['mgp8_pp_ttzz_5f'], - 'ttH':['mgp8_pp_tth01j_5f']} - } - - -legend = {} -legend['ttHH'] = 'ttHH' -legend['ttH'] = 'ttH' -legend['ttZ'] = 'ttZ' -legend['ttZZ'] = 'ttZZ' - - - - diff --git a/examples/FCChh/ttHH/preSel.py b/examples/FCChh/ttHH/preSel.py deleted file mode 100644 index e6ae27f4ec..0000000000 --- a/examples/FCChh/ttHH/preSel.py +++ /dev/null @@ -1,18 +0,0 @@ -#python FCChhAnalyses/FCChh/ttHH/dataframe/preSel.py - -import os -from process import get_process_dict_dirs - -basedir = os.path.join(get_process_dict_dirs()[0], "yaml/FCC/fcc_v04/") -outdir="FCChh/ttHH/" -NUM_CPUS = 20 -process_list=['mgp8_pp_tthh_lambda100_5f', - 'mgp8_pp_ttz_5f', - 'mgp8_pp_ttzz_5f', - 'mgp8_pp_tth01j_5f' - ] -fraction=1 - -import bin.runDataFrame as rdf -myana=rdf.runDataFrame(basedir,process_list) -myana.run(ncpu=NUM_CPUS,fraction=fraction,outDir=outdir) diff --git a/examples/FCChh/tth_4l/CMakeLists.txt b/examples/FCChh/tth_4l/CMakeLists.txt deleted file mode 100644 index 313810c62c..0000000000 --- a/examples/FCChh/tth_4l/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ - -add_executable(fcchh_ana_tth_4l fcchh_ana_tth_4l.cxx) -target_link_libraries(fcchh_ana_tth_4l ${FCCEDM_LIBRARIES} ROOT::ROOTDataFrame ROOT::Physics FCCAnalyses) -target_include_directories(fcchh_ana_tth_4l PUBLIC ${FCCEDM_INCLUDE_DIRS} ${ROOT_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/analyzers/dataframe) diff --git a/examples/FCChh/tth_4l/Makefile b/examples/FCChh/tth_4l/Makefile deleted file mode 100644 index f6fdca479e..0000000000 --- a/examples/FCChh/tth_4l/Makefile +++ /dev/null @@ -1,19 +0,0 @@ - - -# build the reproducer of the heppy analysis with TDataFrame -# todo: cleaner directory structure, cmake, etc - -all: tdataframe_analysis - -tdataframe_analysis: fcc_ana_tth_4l.cxx - g++ -g -o fcc_ana_tth_4l fcc_ana_tth_4l.cxx `root-config --cflags --glibs` -lROOTVecOps -lROOTDataFrame - -tdataframe_analysis_mt: fcc_ana_tth_4l.cxx - g++ -g -o fcc_ana_tth_4l fcc_ana_tth_4l.cxx `root-config --cflags --glibs` -lROOTVecOps -lROOTDataFrame -DENABLEIMPLICITMT - -# run the analysis - -run: - ./fcc_ana_tth_4l tree.root root://eospublic.cern.ch//eos/experiment/fcc/hh/generation/DelphesEvents/fcc_v02/mgp8_pp_tth01j_5f_hllll/events_000000830.root - -# todo: comparison with expected output (/afs/cern.ch/work/v/vavolkl/public/heppy_analysis_samples/localtest/mgp8_pp_tth01j_5f_hllll_1/heppy.FCChhAnalyses.FCChh.tth_4l.TreeProducer.TreeProducer_1/tree.root) diff --git a/examples/FCChh/tth_4l/fcchh_ana_tth_4l.cxx b/examples/FCChh/tth_4l/fcchh_ana_tth_4l.cxx deleted file mode 100644 index 9f0bb29532..0000000000 --- a/examples/FCChh/tth_4l/fcchh_ana_tth_4l.cxx +++ /dev/null @@ -1,101 +0,0 @@ - -#include -#include "TLorentzVector.h" -#include - -// FCC event datamodel includes -#include "datamodel/ParticleData.h" -#include "datamodel/LorentzVector.h" -#include "datamodel/JetData.h" -#include "datamodel/TaggedParticleData.h" -#include "datamodel/TaggedJetData.h" -// Legacy class -#include "datamodel/FloatData.h" - -#include "FCCAnalyses.h" - - -auto _m = fcc::ParticleData(); - - - - -// Reproduce Heppy analysis -int main(int argc, char* argv[]){ - - - #ifdef ENABLEIMPLICITMT - ROOT::EnableImplicitMT(); - #endif - - // fcc edm libraries - gSystem->Load("libdatamodel"); - - // very basic command line argument parsing - if (argc < 3) { - std::cout << "error: need to specify fcc data files to analyze as command line arguments" << std::endl; - std::cout << "usage: fccanalysis_tth_4l outfilename.root datafile1.root datafile2.root ... datafileN.root " << std::endl; - return 1; - } - std::cout << "Read files... "; - std::vector filenames; - - std::string outfilename = argv[1]; - for (int i = 2; i < argc; ++i) { - std::cout << " " << argv[i]; - filenames.push_back(argv[i]); - } - std::cout << std::endl; - - std::cout << "Creating TDataFrame ..." << std::endl; - ROOT::RDataFrame df("events", filenames); - - - std::cout << "Apply selectors and define new branches ..." << std::endl; - auto selectors = df - .Define("selected_electrons", selectParticlesPtIso(20, 0.4), {"electrons", "electronITags"}) - .Define("selected_muons", selectParticlesPtIso(20, 0.4), {"muons", "muonITags"}) - .Define("selected_leptons", mergeElectronsAndMuons, {"selected_electrons", "selected_muons"}) - .Define("zeds", LeptonicZBuilder, {"selected_leptons"}) - .Define("selected_leptons_pt", get_pt, {"selected_leptons"}) - .Define("zeds_pt", get_pt, {"zeds"}) - .Define("higgs", LeptonicHiggsBuilder, {"zeds"}) - .Define("higgs_m", get_mass, {{"higgs"}}) - .Define("higgs_pt", get_pt, {"higgs"}) - .Define("jets_30_bs", selectJets(30, true), {"pfjets04", "pfbTags04"}) - .Define("jets_30_lights", selectJets(30, false), {"pfjets04", "pfbTags04"}) - .Define("selected_bs", noMatchJets(0.2), {"jets_30_bs", "selected_leptons"}) - .Define("selected_lights", noMatchJets(0.2), {"jets_30_lights", "selected_leptons"}) - .Define("nbjets", get_njets, {"selected_bs"}) - .Define("njets", get_njets2, {"selected_bs", "selected_lights"}) - .Define("weight", id_float_legacy, {"mcEventWeights"}) - .Define("n_selected_leptons", get_nparticles, {"selected_leptons"}) - ; - auto nentries = selectors.Count(); - std::cout << "Count events: " << *nentries << std::endl; - std::cout << "Writing snapshot to disk ... \t" << outfilename << std::endl; - selectors.Snapshot("events", outfilename, - { - // fcc particles with additional infos - /* - "zeds", - "zeds_pt", - "selected_muons", - "selected_leptons", - "selected_electrons", - "selected_bs", - "selected_lights", - "higgs", - */ - "selected_leptons_pt", - "higgs_pt", - "higgs_m", - "nbjets", - "njets", - "weight" - - } - ); - - return 0; -} diff --git a/examples/FCChh/tth_4l/fcchh_ana_tth_4l.py b/examples/FCChh/tth_4l/fcchh_ana_tth_4l.py deleted file mode 100644 index c64e7fb839..0000000000 --- a/examples/FCChh/tth_4l/fcchh_ana_tth_4l.py +++ /dev/null @@ -1,71 +0,0 @@ - -import sys -import ROOT - -print "Load cxx analyzers ... ", -ROOT.gSystem.Load("libdatamodel") -ROOT.gSystem.Load("libFCCAnalyses") -print "" - - -_p = ROOT.fcc.ParticleData() -_s = ROOT.selectParticlesPtIso -print _p - - - - -getPt_code =''' -#include "datamodel/ParticleData.h" -#include "datamodel/TaggedParticleData.h" -using namespace ROOT::VecOps; -RVec getPt(const RVec &particles) -{ - auto pt = [](const fcc::ParticleData &p) { return sqrt(pow(p.core.p4.px,2) + pow(p.core.p4.py,2)); }; - return Map(particles, pt); -} -''' -ROOT.gInterpreter.Declare(getPt_code) - - - -print "Create dataframe object from ", -fileListRoot = ROOT.vector('string')() -for fileName in [sys.argv[1]]: - fileListRoot.push_back(fileName) - print fileName, " ", -print " ..." -df = ROOT.RDataFrame("events", fileListRoot) -print " done" - -df2 = df.Define("selected_electrons", "selectParticlesPtIso(20, 0.4)(electrons, electronITags)") \ - .Define("selected_muons", "selectParticlesPtIso(20, 0.4)(muons, muonITags)") \ - .Define("selected_leptons", "mergeElectronsAndMuons(selected_electrons, selected_muons)") \ - .Define("zeds", "LeptonicZBuilder(selected_leptons)") \ - .Define("selected_leptons_pt", "get_pt(selected_leptons)") \ - .Define("zeds_pt", "get_pt(zeds)") \ - .Define("higgs", "LeptonicHiggsBuilder(zeds)") \ - .Define("higgs_m", "get_mass(higgs)") \ - .Define("higgs_pt", "getPt(higgs)") \ - .Define("jets_30_bs", "selectJets(30, true)(pfjets04, pfbTags04)") \ - .Define("jets_30_lights", "selectJets(30, false)(pfjets04, pfbTags04)") \ - .Define("selected_bs", "noMatchJets(0.2)(jets_30_bs, selected_leptons)") \ - .Define("selected_lights", "noMatchJets(0.2)(jets_30_lights, selected_leptons)") \ - .Define("nbjets", "get_njets(selected_bs)") \ - .Define("njets", "get_njets2(selected_bs, selected_lights)") \ - .Define("weight"," id_float_legacy(mcEventWeights)") \ - .Define("n_selected_electrons", "get_nparticles(electrons)") \ - - - -branchList = ROOT.vector('string')() -for branchName in [ - "n_selected_electrons", - "higgs_pt", - "higgs_m", - "nbjets", - "njets", - "weight", - ]: - branchList.push_back(branchName) -df2.Snapshot("events", "tree.root", branchList) diff --git a/examples/FCChh/tth_4l/run.py b/examples/FCChh/tth_4l/run.py deleted file mode 100644 index a78bc37024..0000000000 --- a/examples/FCChh/tth_4l/run.py +++ /dev/null @@ -1,41 +0,0 @@ -""" -Run the FCChh tth_4l tdataframe analysis on all samples. -""" - - - -import os -import subprocess -import sys - -def chunks(l, n): - """Yield successive n-sized chunks from l.""" - for i in xrange(0, len(l), n): - yield l[i:i + n] - - -# expects processname as command line argument, p.ex m -process = sys.argv[1] - -filelist = subprocess.check_output(['eos ls /eos/experiment/fcc/hh/generation/DelphesEvents/fcc_v02/' + process + '/events_00*.root', ""], shell=True) - -filechunks = [e for e in chunks(filelist.split("\n"),10) if e] - -NUM_CPUS = 15 # "None" defaults to all available - -def worker(f1, f2): - - # bash command to run analysis - os.system("fcc_ana_tth_4l " + "tree_" + f1[0].split("events_")[1] + " " + " ".join("root://eospublic//eos/experiment/fcc/hh/generation/DelphesEvents/fcc_v02/"+ process +"/" + _f for _f in f1)) - -def run(pool): - f2 = None # not used. TODO: Remove - for f1 in filechunks: - pool.apply_async(worker, args=(f1,f2)) - -if __name__ == "__main__": - import multiprocessing as mp - pool = mp.Pool(NUM_CPUS) - run(pool) - pool.close() - pool.join() diff --git a/man/man7/fccanalysis-script.7 b/man/man7/fccanalysis-script.7 index ee41d1fcc2..fc35fe9435 100644 --- a/man/man7/fccanalysis-script.7 +++ b/man/man7/fccanalysis-script.7 @@ -175,6 +175,13 @@ If \fITrue\fR the events will be loaded through the EDM4hep RDataSource\&. .br Default value: False .TP +\fBdo_weighted\fR (optional) +Whether to use weighted or raw events\&. +If \fITrue\fR the events will be weighted with EDM4hep's EventHeader.weight +and all normalisation factors calculated with sum of weights accordingly\&. +.br +Default value: False +.TP .B procDict This variable controls which process dictionary will be used. It can be either simple file name, absolute path or url. In the case of simple filename, the file diff --git a/python/process.py b/python/process.py index b1512a8ce6..e521eccd19 100644 --- a/python/process.py +++ b/python/process.py @@ -7,9 +7,11 @@ import json import glob import logging +from typing import Optional import urllib.request import yaml # type: ignore import ROOT # type: ignore +import cppyy ROOT.gROOT.SetBatch(True) @@ -33,6 +35,70 @@ def get_entries(inpath: str) -> int | None: return nevents +def get_entries_sow(infilepath: str, nevents_max: Optional[int] = None, get_local: bool = True, weight_name: str = "EventHeader.weight") -> tuple[int, int, float, float]: + ''' + Get number of original entries and number of actual entries in the file, as well as the sum of weights + ''' + + infile = ROOT.TFile.Open(infilepath) + infile.cd() + + processEvents = 0 + processSumOfWeights = 0. + try: + processEvents = infile.Get('eventsProcessed').GetVal() + except AttributeError: + print('----> Warning: Input file is missing information about ' # should these warnings be kept? + 'original number of events!') + try: + processSumOfWeights = infile.Get('SumOfWeights').GetVal() + except AttributeError: + print('----> Warning: Input file is missing information about ' + 'original sum of weights!') + + if not get_local: + return processEvents, None, processSumOfWeights, None + + eventsTTree = 0 + sumOfWeightsTTree = 0. + + # check for empty chunk (can this be improved? exception from RDF cannot be caught it seems?) + tree =infile.Get("events") + if not tree: + print("Tree not found in file", infilepath, " possibly empty chunk - continuing with next one.") + infile.Close() + return processEvents, eventsTTree, processSumOfWeights, sumOfWeightsTTree + + try: + + #use a RDF here too so the nevents restriction option can be imposed easily for the local events + rdf_tmp = ROOT.ROOT.RDataFrame("events", infilepath) + + if nevents_max: + rdf_tmp = rdf_tmp.Range(0, nevents_max) + + eventsTTree = rdf_tmp.Count().GetValue() + + # eventsTTree = infile.Get("events").GetEntries() + ROOT.gROOT.SetBatch(True) + try: + # infile.Get("events").Draw('EventHeader.weight[0]>>histo') + # histo=ROOT.gDirectory.Get('histo') + histo = rdf_tmp.Histo1D(weight_name) + sumOfWeightsTTree=float(eventsTTree)*histo.GetMean() + except cppyy.gbl.std.runtime_error: + LOGGER.error('Error: Event weights requested with do_weighted,' + 'but input file does not contain weight column. Aborting.') + sys.exit(3) + except AttributeError: + print('----> Error: Input file is missing events TTree! Probably empty chunk.') + infile.Close() + + infile.Close() + + return processEvents, eventsTTree, processSumOfWeights, sumOfWeightsTTree + + def get_process_info(process: str, prod_tag: str, input_dir: str) -> tuple[list[str], list[int]]: diff --git a/python/run_fccanalysis.py b/python/run_fccanalysis.py index 5d222f125b..97dd0d73ac 100644 --- a/python/run_fccanalysis.py +++ b/python/run_fccanalysis.py @@ -14,7 +14,7 @@ import ROOT # type: ignore from anascript import get_element, get_element_dict, get_attribute -from process import get_process_info +from process import get_process_info, get_entries_sow from frame import generate_graph LOGGER = logging.getLogger('FCCAnalyses.run') @@ -265,6 +265,10 @@ def merge_config(args: object, analysis: object) -> dict[str, any]: config['use_data_source'] = True if get_attribute(analysis, 'use_data_source', False): config['use_data_source'] = True + # Check whether to use event weights (only supported as analysis config file option, not command line!) + config['do_weighted'] = False + if get_attribute(analysis, 'do_weighted', False): + config['do_weighted'] = True return config @@ -355,6 +359,9 @@ def run_rdf(config: dict[str, any], try: evtcount_init = dframe2.Count() + sow_init = evtcount_init + if config['do_weighted']: + sow_init = dframe2.Sum("EventHeader.weight") dframe3 = analysis.analyzers(dframe2) @@ -364,6 +371,9 @@ def run_rdf(config: dict[str, any], branch_list.push_back(bname) evtcount_final = dframe3.Count() + sow_final = evtcount_final + if config['do_weighted']: + sow_final = dframe3.Sum("EventHeader.weight") # Generate computational graph of the analysis if args.graph: @@ -375,7 +385,7 @@ def run_rdf(config: dict[str, any], 'occurred:\n%s', excp) sys.exit(3) - return evtcount_init.GetValue(), evtcount_final.GetValue() + return evtcount_init.GetValue(), evtcount_final.GetValue(), sow_init.GetValue(), sow_final.GetValue() # _____________________________________________________________________________ @@ -506,6 +516,12 @@ def run_local(config: dict[str, any], nevents_orig = 0 # The amount of events in the input file(s) nevents_local = 0 + + # Same for the sum of weights + if config['do_weighted']: + sow_orig = 0. + sow_local = 0. + for filepath in infile_list: if not config['use_data_source']: @@ -513,32 +529,54 @@ def run_local(config: dict[str, any], file_list.push_back(filepath) info_msg += f'- {filepath}\t\n' - infile = ROOT.TFile.Open(filepath, 'READ') - try: - nevents_orig += infile.Get('eventsProcessed').GetVal() - except AttributeError: - pass - try: - nevents_local += infile.Get("events").GetEntries() - except AttributeError: - LOGGER.error('Input file:\n%s\nis missing events TTree!\n' - 'Aborting...', filepath) + if config['do_weighted']: + # Adjust number of events in case --nevents was specified + if args.nevents > 0: + nevts_param, nevts_tree, sow_param, sow_tree = get_entries_sow(filepath, args.nevents) + else: + nevts_param, nevts_tree, sow_param, sow_tree = get_entries_sow(filepath) + + nevents_orig += nevts_param + nevents_local += nevts_tree + sow_orig += sow_param + sow_local += sow_tree + + else: + infile = ROOT.TFile.Open(filepath, 'READ') + try: + nevents_orig += infile.Get('eventsProcessed').GetVal() + except AttributeError: + pass + + try: + nevents_local += infile.Get("events").GetEntries() + except AttributeError: + LOGGER.error('Input file:\n%s\nis missing events TTree!\n' + 'Aborting...', filepath) + infile.Close() + sys.exit(3) infile.Close() - sys.exit(3) - infile.Close() - LOGGER.info(info_msg) + # Adjust number of events in case --nevents was specified + if args.nevents > 0 and args.nevents < nevents_local: + nevents_local = args.nevents - # Adjust number of events in case --nevents was specified - if args.nevents > 0 and args.nevents < nevents_local: - nevents_local = args.nevents + LOGGER.info(info_msg) + + if nevents_orig > 0: LOGGER.info('Number of events:\n\t- original: %s\n\t- local: %s', f'{nevents_orig:,}', f'{nevents_local:,}') + if config['do_weighted']: + LOGGER.info('Sum of weights:\n\t- original: %s\n\t- local: %s', + f'{sow_orig:,}', f'{sow_local:,}') else: LOGGER.info('Number of local events: %s', f'{nevents_local:,}') + if config['do_weighted']: + LOGGER.info('Local sum of weights: %s', f'{sow_local:0,.2f}') + output_dir = get_attribute(analysis, 'output_dir', '') if not args.batch: @@ -552,7 +590,7 @@ def run_local(config: dict[str, any], # Run RDF start_time = time.time() - inn, outn = run_rdf(config, args, analysis, file_list, outfile_path) + inn, outn, in_sow, out_sow = run_rdf(config, args, analysis, file_list, outfile_path) elapsed_time = time.time() - start_time # replace nevents_local by inn = the amount of processed events @@ -568,6 +606,13 @@ def run_local(config: dict[str, any], info_msg += f'\nReduction factor local: {outn/inn}' if nevents_orig > 0: info_msg += f'\nReduction factor total: {outn/nevents_orig}' + if config['do_weighted']: + info_msg += f'\nTotal sum of weights processed: {float(in_sow):0,.2f}' + info_msg += f'\nNo. result weighted events : {float(out_sow):0,.2f}' + if in_sow > 0: + info_msg += f'\nReduction factor local, weighted: {float(out_sow/in_sow):0,.4f}' + if sow_orig > 0: + info_msg += f'\nReduction factor total, weighted: {float(out_sow/sow_orig):0,.4f}' info_msg += '\n' info_msg += 80 * '=' info_msg += '\n' @@ -580,8 +625,16 @@ def run_local(config: dict[str, any], 'eventsProcessed', nevents_orig if nevents_orig != 0 else inn) param.Write() - param = ROOT.TParameter(int)('eventsSelected', outn) + param = ROOT.TParameter(int)('eventsSelected', outn) param.Write() + + if config['do_weighted']: + param_sow = ROOT.TParameter(float)( + 'SumOfWeights', + sow_orig if sow_orig != 0 else in_sow ) + param_sow.Write() + param_sow = ROOT.TParameter(float)('SumOfWeightsSelected', out_sow) # No of weighted, selected events + param_sow.Write() outfile.Write() if args.bench: @@ -633,6 +686,9 @@ def run_fccanalysis(args, analysis_module): if output_dir_eos is not None and not os.path.exists(output_dir_eos): os.system(f'mkdir -p {output_dir_eos}') + if config['do_weighted']: + LOGGER.info('Using generator weights') + # Check if test mode is specified, and if so run the analysis on it (this # will exit after) if args.test: @@ -672,6 +728,8 @@ def run_fccanalysis(args, analysis_module): 'analysis script!\nAborting...') sys.exit(3) + + for process_name in process_list: LOGGER.info('Started processing sample "%s" ...', process_name) file_list, event_list = get_process_info(process_name, diff --git a/python/run_final_analysis.py b/python/run_final_analysis.py index 7eaa7740b0..f439210830 100644 --- a/python/run_final_analysis.py +++ b/python/run_final_analysis.py @@ -15,7 +15,7 @@ import ROOT # type: ignore import cppyy from anascript import get_element, get_attribute -from process import get_process_dict +from process import get_process_dict, get_entries_sow from frame import generate_graph LOGGER = logging.getLogger('FCCAnalyses.run_final') @@ -230,6 +230,14 @@ def run(rdf_module, args) -> None: file_list = {} results = {} + # Check if using weighted events is requested + do_weighted = get_attribute(rdf_module, 'do_weighted', False) + + if do_weighted: + LOGGER.info('Using generator weights') + sow_process = process_events.copy() + sow_ttree = events_ttree.copy() + # Checking input directory input_dir = get_attribute(rdf_module, 'inputDir', '') if not input_dir: @@ -265,27 +273,50 @@ def run(rdf_module, args) -> None: for process_name in process_list: process_events[process_name] = 0 events_ttree[process_name] = 0 + if do_weighted: + sow_process[process_name] = 0. + sow_ttree[process_name] = 0. + file_list[process_name] = ROOT.vector('string')() infilepath = input_dir + process_name + '.root' # input file + if not os.path.isfile(infilepath): LOGGER.debug('File %s does not exist!\nTrying if it is a ' 'directory as it might have been processed in batch.', infilepath) else: LOGGER.info('Open file:\n %s', infilepath) - process_events[process_name], events_ttree[process_name] = \ - get_entries(infilepath) + if do_weighted: + process_events[process_name], events_ttree[process_name], \ + sow_process[process_name], sow_ttree[process_name] = \ + get_entries_sow(infilepath, weight_name="weight") + else: + process_events[process_name], events_ttree[process_name] = \ + get_entries(infilepath) file_list[process_name].push_back(infilepath) indirpath = input_dir + process_name if os.path.isdir(indirpath): + #reset the nevts/sow counters to avoid wrong counting in case a single file of same name (e.g. local test output) also exists in the same directory + process_events[process_name] = 0 + events_ttree[process_name] = 0 + sow_process[process_name] = 0. + sow_ttree[process_name] = 0. + info_msg = f'Open directory {indirpath}' flist = glob.glob(indirpath + '/chunk*.root') for filepath in flist: info_msg += '\n\t' + filepath - chunk_process_events, chunk_events_ttree = \ - get_entries(filepath) + if do_weighted: + chunk_process_events, chunk_events_ttree, \ + chunk_sow_process, chunk_sow_ttree = \ + get_entries_sow(filepath, weight_name="weight") + sow_process[process_name] += chunk_sow_process + sow_ttree[process_name] += chunk_sow_ttree + else: + chunk_process_events, chunk_events_ttree = \ + get_entries(filepath) process_events[process_name] += chunk_process_events events_ttree[process_name] += chunk_events_ttree file_list[process_name].push_back(filepath) @@ -300,6 +331,17 @@ def run(rdf_module, args) -> None: info_msg += f'\n\t- {process_name}: {n_events:,}' LOGGER.info(info_msg) + if do_weighted: + info_msg = 'Processed sum of weights:' + for process_name, sow in sow_process.items(): + info_msg += f'\n\t- {process_name}: {sow:,}' + LOGGER.info(info_msg) + info_msg = 'Sum of weights in the TTree:' + for process_name, sow in sow_ttree.items(): + info_msg += f'\n\t- {process_name}: {sow:,}' + LOGGER.info(info_msg) + + # Check if there are any histograms defined histo_list: dict[str, dict[str, any]] = get_attribute(rdf_module, "histoList", {}) @@ -446,6 +488,18 @@ def run(rdf_module, args) -> None: # Now perform the loop and evaluate everything at once. LOGGER.info('Evaluating...') all_events_raw = dframe.Count().GetValue() + all_events_weighted = all_events_raw + + if do_weighted: + # check that the weight column exists, it should always be called "weight" for now + try: + all_events_weighted = dframe.Sum("weight").GetValue() + LOGGER.info(f'Successfully applied event weights, got weighted events = {all_events_weighted:0,.2f}') + except cppyy.gbl.std.runtime_error: + LOGGER.error('Error: Event weights requested with do_weighted, ' + 'but input file does not contain weight column. Aborting.') + sys.exit(3) + LOGGER.info('Done') nevents_real += all_events_raw @@ -453,10 +507,16 @@ def run(rdf_module, args) -> None: if do_scale: LOGGER.info('Scaling cut yields...') - all_events = all_events_raw * 1. * gen_sf * \ - int_lumi / process_events[process_name] - uncertainty = ROOT.Math.sqrt(all_events_raw) * gen_sf * \ - int_lumi / process_events[process_name] + if do_weighted: + all_events = all_events_weighted * 1. * gen_sf * \ + int_lumi / sow_process[process_name] + uncertainty = ROOT.Math.sqrt(all_events_weighted) * gen_sf * \ + int_lumi / sow_process[process_name] + else: + all_events = all_events_raw * 1. * gen_sf * \ + int_lumi / process_events[process_name] + uncertainty = ROOT.Math.sqrt(all_events_raw) * gen_sf * \ + int_lumi / process_events[process_name] else: all_events = all_events_raw uncertainty = ROOT.Math.sqrt(all_events_raw) @@ -518,8 +578,12 @@ def run(rdf_module, args) -> None: hist_name = hist.GetName() + '_raw' outfile.WriteObject(hist.GetValue(), hist_name) if do_scale: - hist.Scale(gen_sf * int_lumi / - process_events[process_name]) + if do_weighted: + hist.Scale(gen_sf * int_lumi / + sow_process[process_name]) + else: + hist.Scale(gen_sf * int_lumi / + process_events[process_name]) outfile.WriteObject(hist.GetValue()) # write all metadata info to the output file @@ -527,9 +591,15 @@ def run(rdf_module, args) -> None: process_events[process_name]) outfile.WriteObject(param) - param = ROOT.TParameter(float)("sumOfWeights", - process_events[process_name]) - outfile.WriteObject(param) + if do_weighted: + param = ROOT.TParameter(float)("sumOfWeights", + sow_process[process_name]) + outfile.WriteObject(param) + + else: + param = ROOT.TParameter(float)("sumOfWeights", + process_events[process_name]) + outfile.WriteObject(param) param = ROOT.TParameter(bool)("scaled", do_scale) @@ -567,6 +637,22 @@ def run(rdf_module, args) -> None: 'events!', cut, process_name) sys.exit(3) + #store also the TParameters for total number of events and sum of weights to the trees + print("Updating file", fout_list[i]) + outfile = ROOT.TFile(fout_list[i], 'update') + param = ROOT.TParameter(int)('eventsProcessed', process_events[process_name]) + print("Number of events processed:", process_events[process_name]) + param.Write() + if do_weighted: + param2 = ROOT.TParameter(float)('SumOfWeights', sow_process[process_name]) + print("Sum of weights:", sow_process[process_name]) + param2.Write() + outfile.Write() + outfile.Close() + + + + # Save results either to JSON or LaTeX tables save_results(results, rdf_module) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9f93d9f0fb..ef5450c194 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -11,6 +11,8 @@ add_integration_test("examples/FCCee/vertex_lcfiplus/analysis_V0.py") add_standalone_test("examples/FCCee/fullSim/caloNtupleizer/analysis.py") +add_standalone_test("examples/FCChh/ggHH_bbyy/analysis_stage1.py") + if(WITH_PODIO_DATASOURCE) add_standalone_test("examples/data_source/standalone.py") add_integration_test("examples/data_source/histmaker_source.py")