From 0609fd94a6024ae215e6ba7714dbc1317d27158f Mon Sep 17 00:00:00 2001 From: Birgit Stapf Date: Wed, 16 Oct 2024 13:34:30 +0200 Subject: [PATCH] clean up FCChh bbyy example, add example for checking btag efficiencies from delphes --- examples/FCChh/ggHH_bbyy/analysis_final.py | 19 +- .../FCChh/ggHH_bbyy/analysis_plot_tag_eff.py | 156 ++++++++++++++ examples/FCChh/ggHH_bbyy/analysis_plots.py | 12 +- examples/FCChh/ggHH_bbyy/analysis_stage1.py | 65 ++---- examples/FCChh/ggHH_bbyy/plot_tag_eff.py | 199 ++++++++++++++++++ 5 files changed, 390 insertions(+), 61 deletions(-) create mode 100644 examples/FCChh/ggHH_bbyy/analysis_plot_tag_eff.py create mode 100644 examples/FCChh/ggHH_bbyy/plot_tag_eff.py diff --git a/examples/FCChh/ggHH_bbyy/analysis_final.py b/examples/FCChh/ggHH_bbyy/analysis_final.py index 21d6266b74..41faab2498 100644 --- a/examples/FCChh/ggHH_bbyy/analysis_final.py +++ b/examples/FCChh/ggHH_bbyy/analysis_final.py @@ -5,17 +5,15 @@ outputDir = "outputs/FCChh/ggHH_bbyy/final/" processList = { - # 'pwp8_pp_hh_5f_hhbbyy':{},#Run over the full statistics from stage2 input file /p8_ee_ZZ_ecm240.root. Keep the same output name as input - 'pwp8_pp_hh_5f_hhbbyy_split_HF_tau_tags':{},#Run over the full statistics from stage2 input file /p8_ee_ZZ_ecm240.root. Keep the same output name as input + 'pwp8_pp_hh_5f_hhbbyy':{}, #output file from analysis_stage1.py } #Link to the dictonary that contains all the cross section informations etc... -procDict = "FCCee_procDict_spring2021_IDEA.json" # will need an FCC-hh one! +procDict = "/eos/experiment/fcc/hh/tutorials/edm4hep_tutorial_data/FCChh_procDict_tutorial.json" +#Note the numbeOfEvents and sumOfWeights are placeholders that get overwritten with the correct values in the samples -#Add MySample_p8_ee_ZH_ecm240 as it is not an offical process TO UPDATE -# procDictAdd={"pwp8_pp_hh_5f_hhbbyy":{"numberOfEvents": 10000000, "sumOfWeights": 10000000, "crossSection": 1.0, "kfactor": 1.0, "matchingEfficiency": 1.0}} -# procDictAdd={"pwp8_pp_hh_5f_hhbbyy_split_HF_tau_tags":{"numberOfEvents": 10000000, "sumOfWeights": 10000000, "crossSection": 1.0, "kfactor": 1.0, "matchingEfficiency": 1.0}} -procDictAdd={"pwp8_pp_hh_5f_hhbbyy_split_HF_tau_tags": {"numberOfEvents": 4980000, "sumOfWeights": 4980000.0, "crossSection": 0.0029844128399999998, "kfactor": 1.075363, "matchingEfficiency": 1.0}} +#How to add a process that is not in the official dictionary: +# procDictAdd={"pwp8_pp_hh_5f_hhbbyy": {"numberOfEvents": 4980000, "sumOfWeights": 4980000.0, "crossSection": 0.0029844128399999998, "kfactor": 1.075363, "matchingEfficiency": 1.0}} # Expected integrated luminosity intLumi = 30e+06 # pb-1 @@ -34,17 +32,18 @@ # Optional: Use weighted events do_weighted = True -###Dictionnay of the list of cuts. The key is the name of the selection that will be added to the output file +# Dictionary of the list of cuts. The key is the name of the selection that will be added to the output file cutList = { "sel0_myy":"m_yy[0] > 100. && m_yy[0] < 180.", "sel1_mbb":"(m_yy[0] > 100. && m_yy[0] < 180.) && (m_bb[0] > 80. && m_bb[0] < 200.)", } - -#Dictionary for the ouput variable/hitograms. The key is the name of the variable in the output files. "name" is the name of the variable in the input file, "title" is the x-axis label of the histogram, "bin" the number of bins of the histogram, "xmin" the minimum x-axis value and "xmax" the maximum x-axis value. +# Dictionary for the output variable/histograms. The key is the name of the variable in the output files. "name" is the name of the variable in the input file, "title" is the x-axis label of the histogram, "bin" the number of bins of the histogram, "xmin" the minimum x-axis value and "xmax" the maximum x-axis value. histoList = { "myy":{"name":"m_yy","title":"m_{#gamma#gamma} [GeV]","bin":50,"xmin":0,"xmax":200}, "myy_zoom":{"name":"m_yy","title":"m_{#gamma#gamma} [GeV]","bin":50,"xmin":100,"xmax":180}, + "mbb":{"name":"m_bb","title":"m_{bb} [GeV]","bin":50,"xmin":0,"xmax":250}, + "mbb_zoom":{"name":"m_bb","title":"m_{b} [GeV]","bin":50,"xmin":80,"xmax":200}, "y1_pT":{"name":"g1_pt","title":"pT_{#gamma1} [GeV]","bin":50,"xmin":0.,"xmax":200.}, "y2_pT":{"name":"g2_pt","title":"pT_{#gamma2} [GeV]","bin":50,"xmin":0.,"xmax":200.}, "pT_y1_vs_y2_2D":{"cols":["g1_pt", "g2_pt"],"title":"m_{Z} - leptonic recoil [GeV]", "bins": [(40,80,100), (100,120,140)]}, # 2D histogram diff --git a/examples/FCChh/ggHH_bbyy/analysis_plot_tag_eff.py b/examples/FCChh/ggHH_bbyy/analysis_plot_tag_eff.py new file mode 100644 index 0000000000..ac426b91df --- /dev/null +++ b/examples/FCChh/ggHH_bbyy/analysis_plot_tag_eff.py @@ -0,0 +1,156 @@ +''' +Analysis example for FCC-hh, using gg->HH->bbyy di-Higgs production events to check the Delphes b-tagging efficiencies +''' +from argparse import ArgumentParser + +# Mandatory: Analysis class where the user defines the operations on the +# dataframe. +class Analysis(): + ''' + Validation of Delphes b-tagging efficiencies in HH->bbyy events. + ''' + def __init__(self, cmdline_args): + parser = ArgumentParser( + description='Additional analysis arguments', + usage='Provide additional arguments after analysis script path') + # parser.add_argument('--bjet-pt', default='10.', type=float, + # help='Minimal pT of the selected b-jets.') + # Parse additional arguments not known to the FCCAnalyses parsers + # All command line arguments know to fccanalysis are provided in the + # `cmdline_arg` dictionary. + self.ana_args, _ = parser.parse_known_args(cmdline_args['unknown']) + + # Mandatory: List of processes to run over + self.process_list = { + # # Add your processes like this: + ## '':{'fraction':, 'chunks':, 'output': }, + # # - needs to correspond either the name of the input .root file, or the name of a directory containing root files + # # If you want to process only part of the events, split the output into chunks or give a different name to the output use the optional arguments + # # or leave blank to use defaults = run the full statistics in one output file named the same as the process: + 'pwp8_pp_hh_5f_hhbbyy': {}, + } + + # Mandatory: Input directory where to find the samples, or a production tag when running over the centrally produced + # samples (this points to the yaml files for getting sample statistics) + self.input_dir = '/eos/experiment/fcc/hh/tutorials/edm4hep_tutorial_data/' + + # Optional: output directory, default is local running directory + self.output_dir = 'outputs/FCChh/ggHH_bbyy/nosel/' + + # Optional: analysisName, default is '' + # self.analysis_name = 'My Analysis' + + # Optional: number of threads to run on, default is 'all available' + # self.n_threads = 4 + + # Optional: running on HTCondor, default is False + # self.run_batch = False + + # Optional: Use weighted events + self.do_weighted = True + + # Optional: test file that is used if you run with the --test argument (fccanalysis run ./examples/FCChh/ggHH_bbyy/analysis_stage1.py --test) + self.test_file = 'root://eospublic.cern.ch//eos/experiment/fcc/hh/' \ + 'tutorials/edm4hep_tutorial_data/' \ + 'pwp8_pp_hh_5f_hhbbyy.root' + + + # Mandatory: analyzers function to define the analysis graph, please make + # sure you return the dataframe, in this example it is dframe2 + def analyzers(self, dframe): + ''' + Analysis graph. + ''' + + dframe2 = ( + dframe + + .Define("weight", "EventHeader.weight") + + ########################################### JETS ########################################### + + #LOOSE WP + .Define("b_tagged_jets_loose", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 0)") #bit 0 = loose WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl + .Define("n_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_n(b_tagged_jets_loose)") + .Define("px_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_px(b_tagged_jets_loose)") + .Define("py_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_py(b_tagged_jets_loose)") + .Define("pz_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_pz(b_tagged_jets_loose)") + .Define("E_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_e(b_tagged_jets_loose)") + .Define("pT_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_pt(b_tagged_jets_loose)") + .Define("eta_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_eta(b_tagged_jets_loose)") + + #MEDIUM WP + .Define("b_tagged_jets_medium", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 1)") #bit 1 = medium WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl + .Define("n_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_n(b_tagged_jets_medium)") + .Define("px_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_px(b_tagged_jets_medium)") + .Define("py_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_py(b_tagged_jets_medium)") + .Define("pz_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_pz(b_tagged_jets_medium)") + .Define("E_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_e(b_tagged_jets_medium)") + .Define("pT_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_pt(b_tagged_jets_medium)") + .Define("eta_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_eta(b_tagged_jets_medium)") + + #TIGHT WP + .Define("b_tagged_jets_tight", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 2)") #bit 2 = tight WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl + .Define("n_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_n(b_tagged_jets_tight)") + .Define("px_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_px(b_tagged_jets_tight)") + .Define("py_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_py(b_tagged_jets_tight)") + .Define("pz_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_pz(b_tagged_jets_tight)") + .Define("E_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_e(b_tagged_jets_tight)") + .Define("pT_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_pt(b_tagged_jets_tight)") + .Define("eta_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_eta(b_tagged_jets_tight)") + + ########################################### MC PARTICLES ########################################### + + #all MC particles + .Define("mc_particles", "Particle") + .Alias("mc_parents", "_Particle_parents.index") + .Alias("mc_daughters", "_Particle_daughters.index") + + + ################################# Gen matched b-jets for b-tag eff study ########################################### + .Define("MC_b", "AnalysisFCChh::getBhadron(mc_particles,mc_parents)") + .Define("jets_genmatched_b", "AnalysisFCChh::find_reco_matches(MC_b, Jet, 0.4)") + + .Define("bjets_loose_genmatched_b", "AnalysisFCChh::find_reco_matches(MC_b, b_tagged_jets_loose, 0.4)") + .Define("bjets_medium_genmatched_b", "AnalysisFCChh::find_reco_matches(MC_b, b_tagged_jets_medium, 0.4)") + .Define("bjets_tight_genmatched_b", "AnalysisFCChh::find_reco_matches(MC_b, b_tagged_jets_tight, 0.4)") + + #all genmatched + .Define("n_jets_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_n(jets_genmatched_b)") + .Define("pT_jets_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_pt(jets_genmatched_b)") + .Define("eta_jets_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_eta(jets_genmatched_b)") + + #loose btag + .Define("n_bjets_loose_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_n(bjets_loose_genmatched_b)") + .Define("pT_bjets_loose_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_pt(bjets_loose_genmatched_b)") + .Define("eta_bjets_loose_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_eta(bjets_loose_genmatched_b)") + + #medium btag + .Define("n_bjets_medium_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_n(bjets_medium_genmatched_b)") + .Define("pT_bjets_medium_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_pt(bjets_medium_genmatched_b)") + .Define("eta_bjets_medium_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_eta(bjets_medium_genmatched_b)") + + #tight btag + .Define("n_bjets_tight_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_n(bjets_tight_genmatched_b)") + .Define("pT_bjets_tight_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_pt(bjets_tight_genmatched_b)") + .Define("eta_bjets_tight_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_eta(bjets_tight_genmatched_b)") + + ) + return dframe2 + + # Mandatory: output function, please make sure you return the branch list + # as a python list + def output(self): + ''' + Output variables which will be saved to output root file. + ''' + branch_list = [ + 'weight', + #gen-matched b-jets to check efficiencies + 'n_b_jets_loose', 'n_b_jets_medium', 'n_b_jets_tight', + 'n_jets_genmatched_b', 'pT_jets_genmatched_b', 'eta_jets_genmatched_b', + 'n_bjets_loose_genmatched_b', 'pT_bjets_loose_genmatched_b', 'eta_bjets_loose_genmatched_b', + 'n_bjets_medium_genmatched_b', 'pT_bjets_medium_genmatched_b', 'eta_bjets_medium_genmatched_b', + 'n_bjets_tight_genmatched_b', 'pT_bjets_tight_genmatched_b', 'eta_bjets_tight_genmatched_b', + ] + return branch_list diff --git a/examples/FCChh/ggHH_bbyy/analysis_plots.py b/examples/FCChh/ggHH_bbyy/analysis_plots.py index 4501cbf98d..137d863987 100644 --- a/examples/FCChh/ggHH_bbyy/analysis_plots.py +++ b/examples/FCChh/ggHH_bbyy/analysis_plots.py @@ -14,8 +14,8 @@ outdir = 'outputs/FCChh/ggHH_bbyy/plots/' plotStatUnc = True -variables = ['myy','myy_zoom','y1_pT','y2_pT'] -# variables = ['myy','myy_zoom','y1_pT','y2_pT','pT_y1_vs_y2_2D'] +variables = ['myy','myy_zoom', 'mbb', 'mbb_zoom', 'y1_pT','y2_pT'] + # rebin = [1, 1, 1, 1, 2] # uniform rebin per variable (optional) ### Dictionary with the analysis name as a key, and the list of selections to be plotted for this analysis. The name of the selections should be the same than in the final selection @@ -28,14 +28,10 @@ colors = {} colors['bbyy_signal'] = ROOT.kRed -# colors['WW'] = ROOT.kBlue+1 -# colors['ZZ'] = ROOT.kGreen+2 -# colors['VV'] = ROOT.kGreen+3 plots = {} -plots['bbyy_analysis'] = {'signal':{'bbyy_signal':['pwp8_pp_hh_5f_hhbbyy_split_HF_tau_tags']}, +plots['bbyy_analysis'] = {'signal':{'bbyy_signal':['pwp8_pp_hh_5f_hhbbyy']}, } - legend = {} -legend['bbyy_signal'] = 'HH signal' \ No newline at end of file +legend['bbyy_signal'] = 'HH' \ No newline at end of file diff --git a/examples/FCChh/ggHH_bbyy/analysis_stage1.py b/examples/FCChh/ggHH_bbyy/analysis_stage1.py index 9fd21679cf..b9ddd7df82 100644 --- a/examples/FCChh/ggHH_bbyy/analysis_stage1.py +++ b/examples/FCChh/ggHH_bbyy/analysis_stage1.py @@ -28,21 +28,18 @@ def __init__(self, cmdline_args): # # - needs to correspond either the name of the input .root file, or the name of a directory containing root files # # If you want to process only part of the events, split the output into chunks or give a different name to the output use the optional arguments # # or leave blank to use defaults = run the full statistics in one output file named the same as the process: - 'pwp8_pp_hh_5f_hhbbyy_split_HF_tau_tags': {}, - # 'pwp8_pp_hh_5f_hhbbyy': {}, + 'pwp8_pp_hh_5f_hhbbyy': {}, } # Mandatory: Input directory where to find the samples, or a production tag when running over the centrally produced # samples (this points to the yaml files for getting sample statistics) - self.input_dir = '/eos/experiment/fcc/hh/tutorials/lhe_unpacked_tester/' - # self.input_dir = '/eos/experiment/fcc/hh/tutorials/edm4hep_tutorial_data/' - # self.prod_tag = 'FCCee/spring2021/IDEA/' + self.input_dir = '/eos/experiment/fcc/hh/tutorials/edm4hep_tutorial_data/' # Optional: output directory, default is local running directory self.output_dir = 'outputs/FCChh/ggHH_bbyy/presel/' # Optional: analysisName, default is '' - # self.analysis_name = 'My Analysis' + self.analysis_name = 'FCC-hh bbyy analysis' # Optional: number of threads to run on, default is 'all available' # self.n_threads = 4 @@ -59,7 +56,7 @@ def __init__(self, cmdline_args): # Optional: test file that is used if you run with the --test argument (fccanalysis run ./examples/FCChh/ggHH_bbyy/analysis_stage1.py --test) self.test_file = 'root://eospublic.cern.ch//eos/experiment/fcc/hh/' \ 'tutorials/edm4hep_tutorial_data/' \ - 'p8_ee_ZH_ecm240.root' + 'pwp8_pp_hh_5f_hhbbyy.root' # Mandatory: analyzers function to define the analysis graph, please make @@ -72,6 +69,9 @@ def analyzers(self, dframe): dframe2 = ( dframe + ########################################### DEFINITION OF VARIABLES ########################################### + + # generator event weight .Define("weight", "EventHeader.weight") ########################################### PHOTONS ########################################### @@ -81,7 +81,7 @@ def analyzers(self, dframe): .Define("sel_gamma_unsort", "FCCAnalyses::ReconstructedParticle::sel_eta(4)(selpt_gamma)") .Define("sel_gamma", "AnalysisFCChh::SortParticleCollection(sel_gamma_unsort)") #sort by pT - .Define("ngamma", "FCCAnalyses::ReconstructedParticle::get_n(sel_gamma)") + .Define("n_gamma", "FCCAnalyses::ReconstructedParticle::get_n(sel_gamma)") .Define("g1_e", "FCCAnalyses::ReconstructedParticle::get_e(sel_gamma)[0]") .Define("g1_pt", "FCCAnalyses::ReconstructedParticle::get_pt(sel_gamma)[0]") .Define("g1_eta", "FCCAnalyses::ReconstructedParticle::get_eta(sel_gamma)[0]") @@ -91,36 +91,20 @@ def analyzers(self, dframe): .Define("g2_eta", "FCCAnalyses::ReconstructedParticle::get_eta(sel_gamma)[1]") .Define("g2_phi", "FCCAnalyses::ReconstructedParticle::get_phi(sel_gamma)[1]") - #H(yy) if it exists, if there are no 2 selected photons, doesnt get filled + # H(yy) if it exists, if there are no 2 selected photons, doesnt get filled .Define("yy_pairs_unmerged", "AnalysisFCChh::getPairs(sel_gamma)") # retrieves the leading pT pair of all possible .Define("yy_pairs", "AnalysisFCChh::merge_pairs(yy_pairs_unmerged)") # merge pair into one object to access inv masses etc .Define("m_yy", "FCCAnalyses::ReconstructedParticle::get_mass(yy_pairs)") - ########################################### JETS ########################################### - # selected jets above a pT threshold of 30 GeV, eta < 4 - .Define("selpt_jets", "FCCAnalyses::ReconstructedParticle::sel_pt(30.)(Jet)") - .Define("sel_jets_unsort", "FCCAnalyses::ReconstructedParticle::sel_eta(4)(selpt_jets)") - .Define("sel_jets", "AnalysisFCChh::SortParticleCollection(sel_jets_unsort)") - .Define("njets", "FCCAnalyses::ReconstructedParticle::get_n(sel_jets)") - .Define("j1_e", "FCCAnalyses::ReconstructedParticle::get_e(sel_jets)[0]") - .Define("j1_pt", "FCCAnalyses::ReconstructedParticle::get_pt(sel_jets)[0]") - .Define("j1_eta", "FCCAnalyses::ReconstructedParticle::get_eta(sel_jets)[0]") - .Define("j1_phi", "FCCAnalyses::ReconstructedParticle::get_phi(sel_jets)[0]") - - .Define("j2_e", "FCCAnalyses::ReconstructedParticle::get_e(sel_jets)[1]") - .Define("j2_pt", "FCCAnalyses::ReconstructedParticle::get_pt(sel_jets)[1]") - .Define("j2_eta", "FCCAnalyses::ReconstructedParticle::get_eta(sel_jets)[1]") - .Define("j2_phi", "FCCAnalyses::ReconstructedParticle::get_phi(sel_jets)[1]") - - #b-tagged jets: - #b tagged jets - .Define("bjets", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 1)") #bit 1 = medium WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl - .Define("selpt_bjets", "FCCAnalyses::ReconstructedParticle::sel_pt(30.)(bjets)") + # b-tagged jets at medium working point + .Define("b_tagged_jets_medium", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 1)") #bit 1 = medium WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl + # select medium b-jets with pT > 30 GeV, |eta| < 4 + .Define("selpt_bjets", "FCCAnalyses::ReconstructedParticle::sel_pt(30.)(b_tagged_jets_medium)") .Define("sel_bjets_unsort", "FCCAnalyses::ReconstructedParticle::sel_eta(4)(selpt_bjets)") - .Define("sel_bjets", "AnalysisFCChh::SortParticleCollection(sel_bjets_unsort)") - .Define("nbjets", "FCCAnalyses::ReconstructedParticle::get_n(sel_bjets)") + .Define("sel_bjets", "AnalysisFCChh::SortParticleCollection(sel_bjets_unsort)") #sort by pT + .Define("n_bjets", "FCCAnalyses::ReconstructedParticle::get_n(sel_bjets)") .Define("b1_e", "FCCAnalyses::ReconstructedParticle::get_e(sel_bjets)[0]") .Define("b1_pt", "FCCAnalyses::ReconstructedParticle::get_pt(sel_bjets)[0]") .Define("b1_eta", "FCCAnalyses::ReconstructedParticle::get_eta(sel_bjets)[0]") @@ -130,20 +114,15 @@ def analyzers(self, dframe): .Define("b2_eta", "FCCAnalyses::ReconstructedParticle::get_eta(sel_bjets)[1]") .Define("b2_phi", "FCCAnalyses::ReconstructedParticle::get_phi(sel_bjets)[1]") - #H(bb) system - using the medium WP jets - if there are no 2 b-tagged jets these variable don't get filled + # H(bb) system - using the medium WP jets - if there are no 2 b-tagged jets these variable don't get filled .Define("bb_pairs_unmerged", "AnalysisFCChh::getPairs(sel_bjets)") # retrieves the leading pT pair of all possible .Define("bb_pairs", "AnalysisFCChh::merge_pairs(bb_pairs_unmerged)") # merge pair into one object to access inv masses etc .Define("m_bb", "FCCAnalyses::ReconstructedParticle::get_mass(bb_pairs)") ########################################### APPLY PRE-SELECTION ########################################### - - #require at least two b-jets and two photons, both with invariant masses compatible with the Higgs mass - .Filter("sel_bjets.size()>1") - .Filter("sel_gamma.size()>1") - .Filter("m_bb[0] < 200.") - .Filter("m_bb[0] > 80.") - .Filter("m_yy[0] < 180.") - .Filter("m_yy[0] > 100.") + # require at least two b-jets and two photons + .Filter("n_gamma > 1") + .Filter("n_bjets > 1") ) return dframe2 @@ -157,8 +136,8 @@ def output(self): branch_list = [ 'weight', # Photons and H(yy) system: - 'ngamma', 'g1_pt', 'g2_pt', 'g1_eta', 'g2_eta', 'm_yy', + 'n_gamma', 'g1_pt', 'g2_pt', 'g1_eta', 'g2_eta', 'm_yy', # b-jets and H(bb) system: - 'nbjets', 'b1_pt', 'b2_pt', 'b1_eta', 'b2_eta', 'm_bb', + 'n_bjets', 'b1_pt', 'b2_pt', 'b1_eta', 'b2_eta', 'm_bb', ] - return branch_list + return branch_list \ No newline at end of file diff --git a/examples/FCChh/ggHH_bbyy/plot_tag_eff.py b/examples/FCChh/ggHH_bbyy/plot_tag_eff.py new file mode 100644 index 0000000000..809d854661 --- /dev/null +++ b/examples/FCChh/ggHH_bbyy/plot_tag_eff.py @@ -0,0 +1,199 @@ +from collections import namedtuple +import ROOT +import argparse +import os +import numpy as np + +ROOT.gROOT.SetBatch() +ROOT.gStyle.SetOptTitle(0) + +def getHfromRDF(hist): + h = None + t = hist.GetValue() + h = t.Clone() + return h + +def makePlots(dict_of_vars, sel_cutstring, input_filepath, out_dir_base, out_format = ".png", do_log_y=False): + + if not os.path.exists(out_dir_base): + os.mkdir(out_dir_base) + + rdf = ROOT.RDataFrame("events", input_filepath) + + if sel_cutstring: + rdf = rdf.Filter(sel_cutstring) #apply the selection + + if not rdf: + print("Empty file for:", input_filepath, " Exiting.") + return + + for plot_name, plot in dict_of_vars.items(): + print("Plotting:", plot_name) + + has_variable_binning = False + if not isinstance(plot.nbins, int): + has_variable_binning = True + hist_binEdges = np.array(ref_var.nbins, dtype=float) + hist_nBins = len(plot.nbins)-1 + model = ROOT.RDF.TH1DModel(plot_name+"_model_hist", plot_name, hist_nBins, hist_binEdges) + else: + model = ROOT.RDF.TH1DModel(plot_name+"_model_hist", plot_name, plot.nbins, plot.xmin, plot.xmax) + + tmp_hist = rdf.Histo1D(model, plot.name) + if not tmp_hist.GetEntries(): + print("Empty histogram for:", input_filepath, " Exiting.") + return + + tmp_hist.GetXaxis().SetTitle(plot.label) + tmp_hist.GetYaxis().SetTitle("Raw MC events") + tmp_hist.SetFillColor(ROOT.kCyan+2) + tmp_hist.SetLineColor(ROOT.kCyan+2) + + #write out + if sel_cutstring == "": + filename = "bbtautau_noSel_"+plot_name+out_format + else: + filename = "bbtautau_withSel_"+plot_name+out_format + fileout = os.path.join(out_dir_base, filename) + + canvas = ROOT.TCanvas("canvas", "canvas", 800, 800) + tmp_hist.Draw("hist same") + canvas.SaveAs(fileout) + +def makeEffPlots(dict_of_vars, ref_var, sel_cutstring, input_filepath, out_dir_base, out_format = ".png", do_log_y=False): + + if not os.path.exists(out_dir_base): + os.mkdir(out_dir_base) + + rdf = ROOT.RDataFrame("events", input_filepath) + + if sel_cutstring: + rdf = rdf.Filter(sel_cutstring) #apply the selection + + if not rdf: + print("Empty file for:", input_filepath, " Exiting.") + return + + #denominator hist + print("Plotting at denominator:", ref_var.name) + has_variable_binning = False + + if not isinstance(ref_var.nbins, int): + has_variable_binning = True + hist_binEdges = np.array(ref_var.nbins, dtype=float) + hist_nBins = len(ref_var.nbins)-1 + model = ROOT.RDF.TH1DModel(ref_var.name+"_model_hist", ref_var.name, hist_nBins, hist_binEdges) + else: + model = ROOT.RDF.TH1DModel(ref_var.name+"_model_hist", ref_var.name, ref_var.nbins, ref_var.xmin, ref_var.xmax) + + tmp_hist_den = rdf.Histo1D(model, ref_var.name) + + if not tmp_hist_den.GetEntries(): + print("Empty histogram for:", input_filepath, " Exiting.") + return + else: + print("Number entries denumerator: ", tmp_hist_den.GetEntries()) + tmp_den = getHfromRDF(tmp_hist_den) + #tmp_hist.GetXaxis().SetTitle(plot.label) + #tmp_hist.GetYaxis().SetTitle("Raw MC events") + #tmp_hist.SetFillColor(ROOT.kCyan+2) + #tmp_hist.SetLineColor(ROOT.kCyan+2) + + for plot_name, plot in dict_of_vars.items(): + print("Plotting:", plot_name) + + has_variable_binning = False + if not isinstance(plot.nbins, int): + has_variable_binning = True + hist_binEdges = np.array(ref_var.nbins, dtype=float) + hist_nBins = len(plot.nbins)-1 + model = ROOT.RDF.TH1DModel(plot_name+"_model_hist", plot_name, hist_nBins, hist_binEdges) + else: + model = ROOT.RDF.TH1DModel(plot_name+"_model_hist", plot_name, plot.nbins, plot.xmin, plot.xmax) + + tmp_hist_num = rdf.Histo1D(model, plot.name) + + if not tmp_hist_num.GetEntries(): + print("Empty histogram for:", input_filepath, " Exiting.") + return + else: + print("Number entries numerator: ", tmp_hist_num.GetEntries()) + + tmp_num = getHfromRDF(tmp_hist_num) + + if ROOT.TEfficiency.CheckConsistency(tmp_num, tmp_den): + Eff = ROOT.TEfficiency(tmp_num, tmp_den) + Eff.SetTitle(";"+plot.label+";Efficiency") + Eff.SetTitle(plot_name) + ptext = ROOT.TPaveText(.7,.85,.85,.9,option='NDC') + if "loose" in plot_name: + ptext.AddText("loose tag") + elif "medium" in plot_name: + ptext.AddText("medium tag") + elif "tight" in plot_name: + ptext.AddText("tight tag") + else: + ptext.AddText(plot_name) + #write out + if sel_cutstring == "": + filename = "bbtautau_noSel_Eff"+plot_name+out_format + else: + filename = "bbtautau_withSel_Eff"+plot_name+out_format + fileout = os.path.join(out_dir_base, filename) + + canvas = ROOT.TCanvas("canvas", "canvas", 900, 700) + canvas.SetGrid() + Eff.Draw("AP") + canvas.Update() + graph = Eff.GetPaintedGraph() + graph.SetMinimum(0.6) + graph.SetMaximum(1) + canvas.Update() + canvas.SetLogx() + ptext.Draw() + canvas.SaveAs(fileout) + else: + print("The histograms are not consistent!") + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Plot b-tagging efficiencies from Delphes") + parser.add_argument('--input', '-i', metavar="INPUTDIR", dest="inFile", required=True, help="Path to the input file.") + parser.add_argument('--outdir', '-o', metavar="OUTPUTDIR", dest="outDir", required=True, help="Output directory.") + args = parser.parse_args() + + #use a custom namedntuple to transfer the plotting info + PlotSpecs = namedtuple('PlotSpecs', ['name', 'xmin', 'xmax', 'label', 'nbins']) + + #plot some variables without any selection: + bbyy_plots_nosel = { + "n_b_jets_loose":PlotSpecs(name="n_b_jets_loose", xmin=0, xmax=5, label="n_b_jets_loose", nbins=5), + "n_b_jets_medium":PlotSpecs(name="n_b_jets_medium", xmin=0, xmax=5, label="n_b_jets_medium", nbins=5), + "n_b_jets_tight":PlotSpecs(name="n_b_jets_tight", xmin=0, xmax=5, label="n_b_jets_tight", nbins=5), + "n_jets_genmatched_b": PlotSpecs(name="n_jets_genmatched_b", xmin=0, xmax=5, label="n_jets_genmatched_b", nbins=5), + "n_bjets_loose_genmatched_b": PlotSpecs(name="n_bjets_loose_genmatched_b", xmin=0, xmax=5, label="n_bjets_loose_genmatched_b", nbins=5), + "n_bjets_medium_genmatched_b": PlotSpecs(name="n_bjets_medium_genmatched_b", xmin=0, xmax=5, label="n_bjets_medium_genmatched_b", nbins=5), + "n_bjets_tight_genmatched_b": PlotSpecs(name="n_bjets_tight_genmatched_b", xmin=0, xmax=5, label="n_bjets_tight_genmatched_b", nbins=5), + } + + sel_cuts_empty = "" + makePlots(bbyy_plots_nosel, sel_cuts_empty, args.inFile, args.outDir) + + #bjet tagging efficiencies: + binedges=[20,50,100,200,500,1000,5000] #same pT binning as in the delphes card + bbyy_plot_pTeff={ + "pT_bjets_loose_genmatched_b":PlotSpecs(name="pT_bjets_loose_genmatched_b", xmin=0., xmax=1000., label="p_{T} bjets [GeV]", nbins=binedges), + "pT_bjets_medium_genmatched_b":PlotSpecs(name="pT_bjets_medium_genmatched_b", xmin=0., xmax=1000., label="p_{T} bjets [GeV]", nbins=binedges), + "pT_bjets_tight_genmatched_b":PlotSpecs(name="pT_bjets_tight_genmatched_b", xmin=0., xmax=1000., label="p_{T} bjets [GeV]", nbins=binedges), + } + #select only one eta bin: + sel_cuts_eta_4 = "n_jets_genmatched_b == 2 && abs(eta_jets_genmatched_b[0]) < 4. && abs(eta_jets_genmatched_b[1]) < 4." + + pT_ref = PlotSpecs(name="pT_jets_genmatched_b", xmin=0., xmax=1000., label="p_{T} #tau_{h} [GeV]", nbins=binedges) + eta_ref = PlotSpecs(name="eta_jets_genmatched_b", xmin=-5, xmax=5., label="#eta #tau_{h}", nbins=20) + + makeEffPlots(bbyy_plot_pTeff, pT_ref, sel_cuts_eta_4, args.inFile, args.outDir) + +# run it with: +# python plot_tag_eff.py -i -o