From 0609fd94a6024ae215e6ba7714dbc1317d27158f Mon Sep 17 00:00:00 2001
From: Birgit Stapf <birgit.sylvia.stapf@cern.ch>
Date: Wed, 16 Oct 2024 13:34:30 +0200
Subject: [PATCH] clean up FCChh bbyy example, add example for checking btag
 efficiencies from delphes

---
 examples/FCChh/ggHH_bbyy/analysis_final.py    |  19 +-
 .../FCChh/ggHH_bbyy/analysis_plot_tag_eff.py  | 156 ++++++++++++++
 examples/FCChh/ggHH_bbyy/analysis_plots.py    |  12 +-
 examples/FCChh/ggHH_bbyy/analysis_stage1.py   |  65 ++----
 examples/FCChh/ggHH_bbyy/plot_tag_eff.py      | 199 ++++++++++++++++++
 5 files changed, 390 insertions(+), 61 deletions(-)
 create mode 100644 examples/FCChh/ggHH_bbyy/analysis_plot_tag_eff.py
 create mode 100644 examples/FCChh/ggHH_bbyy/plot_tag_eff.py
diff --git a/examples/FCChh/ggHH_bbyy/analysis_final.py b/examples/FCChh/ggHH_bbyy/analysis_final.py
index 21d6266b74..41faab2498 100644
--- a/examples/FCChh/ggHH_bbyy/analysis_final.py
+++ b/examples/FCChh/ggHH_bbyy/analysis_final.py
@@ -5,17 +5,15 @@
 outputDir  = "outputs/FCChh/ggHH_bbyy/final/"
 
 processList = {
-    # 'pwp8_pp_hh_5f_hhbbyy':{},#Run over the full statistics from stage2 input file <inputDir>/p8_ee_ZZ_ecm240.root. Keep the same output name as input
-    'pwp8_pp_hh_5f_hhbbyy_split_HF_tau_tags':{},#Run over the full statistics from stage2 input file <inputDir>/p8_ee_ZZ_ecm240.root. Keep the same output name as input
+    'pwp8_pp_hh_5f_hhbbyy':{}, #output file from analysis_stage1.py
 }
 
 #Link to the dictonary that contains all the cross section informations etc...
-procDict = "FCCee_procDict_spring2021_IDEA.json" # will need an FCC-hh one!
+procDict = "/eos/experiment/fcc/hh/tutorials/edm4hep_tutorial_data/FCChh_procDict_tutorial.json"
+#Note the numbeOfEvents and sumOfWeights are placeholders that get overwritten with the correct values in the samples
 
-#Add MySample_p8_ee_ZH_ecm240 as it is not an offical process TO UPDATE 
-# procDictAdd={"pwp8_pp_hh_5f_hhbbyy":{"numberOfEvents": 10000000, "sumOfWeights": 10000000, "crossSection": 1.0, "kfactor": 1.0, "matchingEfficiency": 1.0}}
-# procDictAdd={"pwp8_pp_hh_5f_hhbbyy_split_HF_tau_tags":{"numberOfEvents": 10000000, "sumOfWeights": 10000000, "crossSection": 1.0, "kfactor": 1.0, "matchingEfficiency": 1.0}}
-procDictAdd={"pwp8_pp_hh_5f_hhbbyy_split_HF_tau_tags": {"numberOfEvents": 4980000, "sumOfWeights": 4980000.0, "crossSection": 0.0029844128399999998, "kfactor": 1.075363, "matchingEfficiency": 1.0}}
+#How to add a process that is not in the official dictionary:
+# procDictAdd={"pwp8_pp_hh_5f_hhbbyy": {"numberOfEvents": 4980000, "sumOfWeights": 4980000.0, "crossSection": 0.0029844128399999998, "kfactor": 1.075363, "matchingEfficiency": 1.0}}
 
 # Expected integrated luminosity
 intLumi = 30e+06  # pb-1
@@ -34,17 +32,18 @@
 # Optional: Use weighted events
 do_weighted = True 
 
-###Dictionnay of the list of cuts. The key is the name of the selection that will be added to the output file
+# Dictionary of the list of cuts. The key is the name of the selection that will be added to the output file
 cutList = {
             "sel0_myy":"m_yy[0] > 100. && m_yy[0] < 180.",
             "sel1_mbb":"(m_yy[0] > 100. && m_yy[0] < 180.) && (m_bb[0] > 80. && m_bb[0] < 200.)",
             }
 
-
-#Dictionary for the ouput variable/hitograms. The key is the name of the variable in the output files. "name" is the name of the variable in the input file, "title" is the x-axis label of the histogram, "bin" the number of bins of the histogram, "xmin" the minimum x-axis value and "xmax" the maximum x-axis value.
+# Dictionary for the output variable/histograms. The key is the name of the variable in the output files. "name" is the name of the variable in the input file, "title" is the x-axis label of the histogram, "bin" the number of bins of the histogram, "xmin" the minimum x-axis value and "xmax" the maximum x-axis value.
 histoList = {
     "myy":{"name":"m_yy","title":"m_{#gamma#gamma} [GeV]","bin":50,"xmin":0,"xmax":200},
     "myy_zoom":{"name":"m_yy","title":"m_{#gamma#gamma} [GeV]","bin":50,"xmin":100,"xmax":180},
+    "mbb":{"name":"m_bb","title":"m_{bb} [GeV]","bin":50,"xmin":0,"xmax":250},
+    "mbb_zoom":{"name":"m_bb","title":"m_{b} [GeV]","bin":50,"xmin":80,"xmax":200},
     "y1_pT":{"name":"g1_pt","title":"pT_{#gamma1} [GeV]","bin":50,"xmin":0.,"xmax":200.},
     "y2_pT":{"name":"g2_pt","title":"pT_{#gamma2} [GeV]","bin":50,"xmin":0.,"xmax":200.},
     "pT_y1_vs_y2_2D":{"cols":["g1_pt", "g2_pt"],"title":"m_{Z} - leptonic recoil [GeV]", "bins": [(40,80,100), (100,120,140)]}, # 2D histogram
diff --git a/examples/FCChh/ggHH_bbyy/analysis_plot_tag_eff.py b/examples/FCChh/ggHH_bbyy/analysis_plot_tag_eff.py
new file mode 100644
index 0000000000..ac426b91df
--- /dev/null
+++ b/examples/FCChh/ggHH_bbyy/analysis_plot_tag_eff.py
@@ -0,0 +1,156 @@
+'''
+Analysis example for FCC-hh, using gg->HH->bbyy di-Higgs production events to check the Delphes b-tagging efficiencies
+'''
+from argparse import ArgumentParser
+
+# Mandatory: Analysis class where the user defines the operations on the
+# dataframe.
+class Analysis():
+    '''
+    Validation of Delphes b-tagging efficiencies in HH->bbyy events.
+    '''
+    def __init__(self, cmdline_args):
+        parser = ArgumentParser(
+            description='Additional analysis arguments',
+            usage='Provide additional arguments after analysis script path')
+        # parser.add_argument('--bjet-pt', default='10.', type=float,
+        #                     help='Minimal pT of the selected b-jets.')
+        # Parse additional arguments not known to the FCCAnalyses parsers
+        # All command line arguments know to fccanalysis are provided in the
+        # `cmdline_arg` dictionary.
+        self.ana_args, _ = parser.parse_known_args(cmdline_args['unknown'])
+
+        # Mandatory: List of processes to run over
+        self.process_list = {
+            # # Add your processes like this: 
+            ## '<name of process>':{'fraction':<fraction of events to run over>, 'chunks':<number of chunks to split the output into>, 'output':<name of the output file> }, 
+            # # - <name of process> needs to correspond either the name of the input .root file, or the name of a directory containing root files 
+            # # If you want to process only part of the events, split the output into chunks or give a different name to the output use the optional arguments
+            # # or leave blank to use defaults = run the full statistics in one output file named the same as the process:
+            'pwp8_pp_hh_5f_hhbbyy': {},
+        }
+
+        # Mandatory: Input directory where to find the samples, or a production tag when running over the centrally produced
+        # samples (this points to the yaml files for getting sample statistics)
+        self.input_dir = '/eos/experiment/fcc/hh/tutorials/edm4hep_tutorial_data/'
+
+        # Optional: output directory, default is local running directory
+        self.output_dir = 'outputs/FCChh/ggHH_bbyy/nosel/'
+
+        # Optional: analysisName, default is ''
+        # self.analysis_name = 'My Analysis'
+
+        # Optional: number of threads to run on, default is 'all available'
+        # self.n_threads = 4
+
+        # Optional: running on HTCondor, default is False
+        # self.run_batch = False
+
+        # Optional: Use weighted events
+        self.do_weighted = True 
+
+        # Optional: test file that is used if you run with the --test argument (fccanalysis run ./examples/FCChh/ggHH_bbyy/analysis_stage1.py --test)
+        self.test_file = 'root://eospublic.cern.ch//eos/experiment/fcc/hh/' \
+                         'tutorials/edm4hep_tutorial_data/' \
+                         'pwp8_pp_hh_5f_hhbbyy.root'
+
+
+    # Mandatory: analyzers function to define the analysis graph, please make
+    # sure you return the dataframe, in this example it is dframe2
+    def analyzers(self, dframe):
+        '''
+        Analysis graph.
+        '''
+
+        dframe2 = (
+            dframe
+
+            .Define("weight",  "EventHeader.weight")
+
+            ########################################### JETS ########################################### 
+
+            #LOOSE WP
+            .Define("b_tagged_jets_loose", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 0)") #bit 0 = loose WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl
+            .Define("n_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_n(b_tagged_jets_loose)")
+            .Define("px_b_jets_loose",  "FCCAnalyses::ReconstructedParticle::get_px(b_tagged_jets_loose)")
+            .Define("py_b_jets_loose",  "FCCAnalyses::ReconstructedParticle::get_py(b_tagged_jets_loose)")
+            .Define("pz_b_jets_loose",  "FCCAnalyses::ReconstructedParticle::get_pz(b_tagged_jets_loose)")
+            .Define("E_b_jets_loose",  "FCCAnalyses::ReconstructedParticle::get_e(b_tagged_jets_loose)")
+            .Define("pT_b_jets_loose",  "FCCAnalyses::ReconstructedParticle::get_pt(b_tagged_jets_loose)")
+            .Define("eta_b_jets_loose",  "FCCAnalyses::ReconstructedParticle::get_eta(b_tagged_jets_loose)")
+
+            #MEDIUM WP
+            .Define("b_tagged_jets_medium", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 1)") #bit 1 = medium WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl
+            .Define("n_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_n(b_tagged_jets_medium)")
+            .Define("px_b_jets_medium",  "FCCAnalyses::ReconstructedParticle::get_px(b_tagged_jets_medium)")
+            .Define("py_b_jets_medium",  "FCCAnalyses::ReconstructedParticle::get_py(b_tagged_jets_medium)")
+            .Define("pz_b_jets_medium",  "FCCAnalyses::ReconstructedParticle::get_pz(b_tagged_jets_medium)")
+            .Define("E_b_jets_medium",  "FCCAnalyses::ReconstructedParticle::get_e(b_tagged_jets_medium)")
+            .Define("pT_b_jets_medium",  "FCCAnalyses::ReconstructedParticle::get_pt(b_tagged_jets_medium)")
+            .Define("eta_b_jets_medium",  "FCCAnalyses::ReconstructedParticle::get_eta(b_tagged_jets_medium)")
+
+            #TIGHT WP
+            .Define("b_tagged_jets_tight", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 2)") #bit 2 = tight WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl
+            .Define("n_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_n(b_tagged_jets_tight)")
+            .Define("px_b_jets_tight",  "FCCAnalyses::ReconstructedParticle::get_px(b_tagged_jets_tight)")
+            .Define("py_b_jets_tight",  "FCCAnalyses::ReconstructedParticle::get_py(b_tagged_jets_tight)")
+            .Define("pz_b_jets_tight",  "FCCAnalyses::ReconstructedParticle::get_pz(b_tagged_jets_tight)")
+            .Define("E_b_jets_tight",  "FCCAnalyses::ReconstructedParticle::get_e(b_tagged_jets_tight)")
+            .Define("pT_b_jets_tight",  "FCCAnalyses::ReconstructedParticle::get_pt(b_tagged_jets_tight)")
+            .Define("eta_b_jets_tight",  "FCCAnalyses::ReconstructedParticle::get_eta(b_tagged_jets_tight)")
+
+            ########################################### MC PARTICLES ########################################### 
+
+            #all MC particles
+            .Define("mc_particles", "Particle")
+            .Alias("mc_parents", "_Particle_parents.index")
+            .Alias("mc_daughters", "_Particle_daughters.index")
+
+
+            ################################# Gen matched b-jets for b-tag eff study ########################################### 
+            .Define("MC_b", "AnalysisFCChh::getBhadron(mc_particles,mc_parents)")
+            .Define("jets_genmatched_b", "AnalysisFCChh::find_reco_matches(MC_b, Jet, 0.4)")
+
+            .Define("bjets_loose_genmatched_b", "AnalysisFCChh::find_reco_matches(MC_b, b_tagged_jets_loose, 0.4)")
+            .Define("bjets_medium_genmatched_b", "AnalysisFCChh::find_reco_matches(MC_b, b_tagged_jets_medium, 0.4)")
+            .Define("bjets_tight_genmatched_b", "AnalysisFCChh::find_reco_matches(MC_b, b_tagged_jets_tight, 0.4)")
+
+            #all genmatched
+            .Define("n_jets_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_n(jets_genmatched_b)")
+            .Define("pT_jets_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_pt(jets_genmatched_b)")
+            .Define("eta_jets_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_eta(jets_genmatched_b)")
+
+            #loose btag
+            .Define("n_bjets_loose_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_n(bjets_loose_genmatched_b)")
+            .Define("pT_bjets_loose_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_pt(bjets_loose_genmatched_b)")
+            .Define("eta_bjets_loose_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_eta(bjets_loose_genmatched_b)")
+
+            #medium btag
+            .Define("n_bjets_medium_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_n(bjets_medium_genmatched_b)")
+            .Define("pT_bjets_medium_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_pt(bjets_medium_genmatched_b)")
+            .Define("eta_bjets_medium_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_eta(bjets_medium_genmatched_b)")
+
+            #tight btag
+            .Define("n_bjets_tight_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_n(bjets_tight_genmatched_b)")
+            .Define("pT_bjets_tight_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_pt(bjets_tight_genmatched_b)")
+            .Define("eta_bjets_tight_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_eta(bjets_tight_genmatched_b)")
+
+        )
+        return dframe2
+
+    # Mandatory: output function, please make sure you return the branch list
+    # as a python list
+    def output(self):
+        '''
+        Output variables which will be saved to output root file.
+        '''
+        branch_list = [
+            'weight',
+            #gen-matched b-jets to check efficiencies
+            'n_b_jets_loose', 'n_b_jets_medium', 'n_b_jets_tight',
+            'n_jets_genmatched_b', 'pT_jets_genmatched_b', 'eta_jets_genmatched_b',
+            'n_bjets_loose_genmatched_b', 'pT_bjets_loose_genmatched_b', 'eta_bjets_loose_genmatched_b',
+            'n_bjets_medium_genmatched_b', 'pT_bjets_medium_genmatched_b', 'eta_bjets_medium_genmatched_b',
+            'n_bjets_tight_genmatched_b', 'pT_bjets_tight_genmatched_b', 'eta_bjets_tight_genmatched_b',
+        ]
+        return branch_list
diff --git a/examples/FCChh/ggHH_bbyy/analysis_plots.py b/examples/FCChh/ggHH_bbyy/analysis_plots.py
index 4501cbf98d..137d863987 100644
--- a/examples/FCChh/ggHH_bbyy/analysis_plots.py
+++ b/examples/FCChh/ggHH_bbyy/analysis_plots.py
@@ -14,8 +14,8 @@
 outdir         = 'outputs/FCChh/ggHH_bbyy/plots/'
 plotStatUnc    = True
 
-variables = ['myy','myy_zoom','y1_pT','y2_pT']
-# variables = ['myy','myy_zoom','y1_pT','y2_pT','pT_y1_vs_y2_2D']
+variables = ['myy','myy_zoom', 'mbb', 'mbb_zoom', 'y1_pT','y2_pT']
+
 # rebin = [1, 1, 1, 1, 2] # uniform rebin per variable (optional)
 
 ### Dictionary with the analysis name as a key, and the list of selections to be plotted for this analysis. The name of the selections should be the same than in the final selection
@@ -28,14 +28,10 @@
 
 colors = {}
 colors['bbyy_signal'] = ROOT.kRed
-# colors['WW'] = ROOT.kBlue+1
-# colors['ZZ'] = ROOT.kGreen+2
-# colors['VV'] = ROOT.kGreen+3
 
 plots = {}
-plots['bbyy_analysis'] = {'signal':{'bbyy_signal':['pwp8_pp_hh_5f_hhbbyy_split_HF_tau_tags']},
+plots['bbyy_analysis'] = {'signal':{'bbyy_signal':['pwp8_pp_hh_5f_hhbbyy']},
            }
 
-
 legend = {}
-legend['bbyy_signal'] = 'HH signal'
\ No newline at end of file
+legend['bbyy_signal'] = 'HH'
\ No newline at end of file
diff --git a/examples/FCChh/ggHH_bbyy/analysis_stage1.py b/examples/FCChh/ggHH_bbyy/analysis_stage1.py
index 9fd21679cf..b9ddd7df82 100644
--- a/examples/FCChh/ggHH_bbyy/analysis_stage1.py
+++ b/examples/FCChh/ggHH_bbyy/analysis_stage1.py
@@ -28,21 +28,18 @@ def __init__(self, cmdline_args):
             # # - <name of process> needs to correspond either the name of the input .root file, or the name of a directory containing root files 
             # # If you want to process only part of the events, split the output into chunks or give a different name to the output use the optional arguments
             # # or leave blank to use defaults = run the full statistics in one output file named the same as the process:
-            'pwp8_pp_hh_5f_hhbbyy_split_HF_tau_tags': {},
-            # 'pwp8_pp_hh_5f_hhbbyy': {},
+            'pwp8_pp_hh_5f_hhbbyy': {},
         }
 
         # Mandatory: Input directory where to find the samples, or a production tag when running over the centrally produced
         # samples (this points to the yaml files for getting sample statistics)
-        self.input_dir = '/eos/experiment/fcc/hh/tutorials/lhe_unpacked_tester/'
-        # self.input_dir = '/eos/experiment/fcc/hh/tutorials/edm4hep_tutorial_data/'
-        # self.prod_tag = 'FCCee/spring2021/IDEA/'
+        self.input_dir = '/eos/experiment/fcc/hh/tutorials/edm4hep_tutorial_data/'
 
         # Optional: output directory, default is local running directory
         self.output_dir = 'outputs/FCChh/ggHH_bbyy/presel/'
 
         # Optional: analysisName, default is ''
-        # self.analysis_name = 'My Analysis'
+        self.analysis_name = 'FCC-hh bbyy analysis'
 
         # Optional: number of threads to run on, default is 'all available'
         # self.n_threads = 4
@@ -59,7 +56,7 @@ def __init__(self, cmdline_args):
         # Optional: test file that is used if you run with the --test argument (fccanalysis run ./examples/FCChh/ggHH_bbyy/analysis_stage1.py --test)
         self.test_file = 'root://eospublic.cern.ch//eos/experiment/fcc/hh/' \
                          'tutorials/edm4hep_tutorial_data/' \
-                         'p8_ee_ZH_ecm240.root'
+                         'pwp8_pp_hh_5f_hhbbyy.root'
 
 
     # Mandatory: analyzers function to define the analysis graph, please make
@@ -72,6 +69,9 @@ def analyzers(self, dframe):
         dframe2 = (
             dframe
 
+            ########################################### DEFINITION OF VARIABLES ########################################### 
+
+            # generator event weight
             .Define("weight",  "EventHeader.weight")
 
             ########################################### PHOTONS ########################################### 
@@ -81,7 +81,7 @@ def analyzers(self, dframe):
             .Define("sel_gamma_unsort", "FCCAnalyses::ReconstructedParticle::sel_eta(4)(selpt_gamma)")
             .Define("sel_gamma", "AnalysisFCChh::SortParticleCollection(sel_gamma_unsort)") #sort by pT
 
-            .Define("ngamma",  "FCCAnalyses::ReconstructedParticle::get_n(sel_gamma)") 
+            .Define("n_gamma",  "FCCAnalyses::ReconstructedParticle::get_n(sel_gamma)") 
             .Define("g1_e",  "FCCAnalyses::ReconstructedParticle::get_e(sel_gamma)[0]")
             .Define("g1_pt",  "FCCAnalyses::ReconstructedParticle::get_pt(sel_gamma)[0]")
             .Define("g1_eta",  "FCCAnalyses::ReconstructedParticle::get_eta(sel_gamma)[0]")
@@ -91,36 +91,20 @@ def analyzers(self, dframe):
             .Define("g2_eta",  "FCCAnalyses::ReconstructedParticle::get_eta(sel_gamma)[1]")
             .Define("g2_phi",  "FCCAnalyses::ReconstructedParticle::get_phi(sel_gamma)[1]")
 
-            #H(yy) if it exists, if there are no 2 selected photons, doesnt get filled 
+            # H(yy) if it exists, if there are no 2 selected photons, doesnt get filled 
             .Define("yy_pairs_unmerged", "AnalysisFCChh::getPairs(sel_gamma)") # retrieves the leading pT pair of all possible 
             .Define("yy_pairs", "AnalysisFCChh::merge_pairs(yy_pairs_unmerged)") # merge pair into one object to access inv masses etc
             .Define("m_yy", "FCCAnalyses::ReconstructedParticle::get_mass(yy_pairs)")
 
-
             ########################################### JETS ########################################### 
 
-            # selected jets above a pT threshold of 30 GeV, eta < 4
-            .Define("selpt_jets", "FCCAnalyses::ReconstructedParticle::sel_pt(30.)(Jet)")
-            .Define("sel_jets_unsort", "FCCAnalyses::ReconstructedParticle::sel_eta(4)(selpt_jets)")
-            .Define("sel_jets", "AnalysisFCChh::SortParticleCollection(sel_jets_unsort)") 
-            .Define("njets",  "FCCAnalyses::ReconstructedParticle::get_n(sel_jets)")
-            .Define("j1_e",  "FCCAnalyses::ReconstructedParticle::get_e(sel_jets)[0]")
-            .Define("j1_pt",  "FCCAnalyses::ReconstructedParticle::get_pt(sel_jets)[0]")
-            .Define("j1_eta",  "FCCAnalyses::ReconstructedParticle::get_eta(sel_jets)[0]")
-            .Define("j1_phi",  "FCCAnalyses::ReconstructedParticle::get_phi(sel_jets)[0]")
-
-            .Define("j2_e",  "FCCAnalyses::ReconstructedParticle::get_e(sel_jets)[1]")
-            .Define("j2_pt",  "FCCAnalyses::ReconstructedParticle::get_pt(sel_jets)[1]")
-            .Define("j2_eta",  "FCCAnalyses::ReconstructedParticle::get_eta(sel_jets)[1]")
-            .Define("j2_phi",  "FCCAnalyses::ReconstructedParticle::get_phi(sel_jets)[1]")
-
-            #b-tagged jets:
-            #b tagged jets
-            .Define("bjets", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 1)") #bit 1 = medium WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl
-            .Define("selpt_bjets", "FCCAnalyses::ReconstructedParticle::sel_pt(30.)(bjets)")
+            # b-tagged jets at medium working point
+            .Define("b_tagged_jets_medium", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 1)") #bit 1 = medium WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl
+            # select medium b-jets with pT > 30 GeV, |eta| < 4
+            .Define("selpt_bjets", "FCCAnalyses::ReconstructedParticle::sel_pt(30.)(b_tagged_jets_medium)")
             .Define("sel_bjets_unsort", "FCCAnalyses::ReconstructedParticle::sel_eta(4)(selpt_bjets)")
-            .Define("sel_bjets", "AnalysisFCChh::SortParticleCollection(sel_bjets_unsort)")
-            .Define("nbjets", "FCCAnalyses::ReconstructedParticle::get_n(sel_bjets)")
+            .Define("sel_bjets", "AnalysisFCChh::SortParticleCollection(sel_bjets_unsort)") #sort by pT
+            .Define("n_bjets", "FCCAnalyses::ReconstructedParticle::get_n(sel_bjets)")
             .Define("b1_e",  "FCCAnalyses::ReconstructedParticle::get_e(sel_bjets)[0]")
             .Define("b1_pt",  "FCCAnalyses::ReconstructedParticle::get_pt(sel_bjets)[0]")
             .Define("b1_eta",  "FCCAnalyses::ReconstructedParticle::get_eta(sel_bjets)[0]")
@@ -130,20 +114,15 @@ def analyzers(self, dframe):
             .Define("b2_eta",  "FCCAnalyses::ReconstructedParticle::get_eta(sel_bjets)[1]")
             .Define("b2_phi",  "FCCAnalyses::ReconstructedParticle::get_phi(sel_bjets)[1]")
 
-            #H(bb) system - using the medium WP jets - if there are no 2 b-tagged jets these variable don't get filled
+            # H(bb) system - using the medium WP jets - if there are no 2 b-tagged jets these variable don't get filled
             .Define("bb_pairs_unmerged", "AnalysisFCChh::getPairs(sel_bjets)") # retrieves the leading pT pair of all possible 
             .Define("bb_pairs", "AnalysisFCChh::merge_pairs(bb_pairs_unmerged)") # merge pair into one object to access inv masses etc
             .Define("m_bb", "FCCAnalyses::ReconstructedParticle::get_mass(bb_pairs)")
 
             ########################################### APPLY PRE-SELECTION ########################################### 
-
-            #require at least two b-jets and two photons, both with invariant masses compatible with the Higgs mass
-            .Filter("sel_bjets.size()>1")
-            .Filter("sel_gamma.size()>1") 
-            .Filter("m_bb[0] < 200.") 
-            .Filter("m_bb[0] > 80.") 
-            .Filter("m_yy[0] < 180.") 
-            .Filter("m_yy[0] > 100.")   
+            # require at least two b-jets and two photons
+            .Filter("n_gamma > 1")
+            .Filter("n_bjets > 1")
 
         )
         return dframe2
@@ -157,8 +136,8 @@ def output(self):
         branch_list = [
             'weight',
             # Photons and H(yy) system:
-            'ngamma', 'g1_pt', 'g2_pt', 'g1_eta', 'g2_eta', 'm_yy',
+            'n_gamma', 'g1_pt', 'g2_pt', 'g1_eta', 'g2_eta', 'm_yy',
             # b-jets and H(bb) system:
-            'nbjets', 'b1_pt', 'b2_pt', 'b1_eta', 'b2_eta', 'm_bb',
+            'n_bjets', 'b1_pt', 'b2_pt', 'b1_eta', 'b2_eta', 'm_bb',
         ]
-        return branch_list
+        return branch_list
\ No newline at end of file
diff --git a/examples/FCChh/ggHH_bbyy/plot_tag_eff.py b/examples/FCChh/ggHH_bbyy/plot_tag_eff.py
new file mode 100644
index 0000000000..809d854661
--- /dev/null
+++ b/examples/FCChh/ggHH_bbyy/plot_tag_eff.py
@@ -0,0 +1,199 @@
+from collections import namedtuple
+import ROOT
+import argparse
+import os 
+import numpy as np
+
+ROOT.gROOT.SetBatch()
+ROOT.gStyle.SetOptTitle(0)
+
+def getHfromRDF(hist):
+    h = None
+    t = hist.GetValue()
+    h = t.Clone()
+    return h
+
+def makePlots(dict_of_vars, sel_cutstring, input_filepath, out_dir_base, out_format = ".png", do_log_y=False):
+
+   if not os.path.exists(out_dir_base):
+      os.mkdir(out_dir_base)
+
+   rdf = ROOT.RDataFrame("events", input_filepath)
+
+   if sel_cutstring:
+      rdf = rdf.Filter(sel_cutstring) #apply the selection
+
+   if not rdf:
+      print("Empty file for:", input_filepath, " Exiting.")
+      return
+
+   for plot_name, plot in dict_of_vars.items():
+      print("Plotting:", plot_name)
+
+      has_variable_binning = False
+      if not isinstance(plot.nbins, int):
+         has_variable_binning = True
+         hist_binEdges = np.array(ref_var.nbins, dtype=float)
+         hist_nBins = len(plot.nbins)-1
+         model = ROOT.RDF.TH1DModel(plot_name+"_model_hist", plot_name, hist_nBins, hist_binEdges)
+      else:
+         model = ROOT.RDF.TH1DModel(plot_name+"_model_hist", plot_name, plot.nbins, plot.xmin, plot.xmax)
+
+      tmp_hist = rdf.Histo1D(model, plot.name)
+      if not tmp_hist.GetEntries():
+         print("Empty histogram for:", input_filepath, " Exiting.")
+         return
+
+      tmp_hist.GetXaxis().SetTitle(plot.label)
+      tmp_hist.GetYaxis().SetTitle("Raw MC events")
+      tmp_hist.SetFillColor(ROOT.kCyan+2)
+      tmp_hist.SetLineColor(ROOT.kCyan+2)
+
+      #write out
+      if sel_cutstring == "":
+         filename = "bbtautau_noSel_"+plot_name+out_format
+      else:
+         filename = "bbtautau_withSel_"+plot_name+out_format
+      fileout = os.path.join(out_dir_base, filename)
+
+      canvas = ROOT.TCanvas("canvas", "canvas", 800, 800) 
+      tmp_hist.Draw("hist same")
+      canvas.SaveAs(fileout)
+
+def makeEffPlots(dict_of_vars, ref_var, sel_cutstring, input_filepath, out_dir_base, out_format = ".png", do_log_y=False):
+
+        if not os.path.exists(out_dir_base):
+                os.mkdir(out_dir_base)
+
+        rdf = ROOT.RDataFrame("events", input_filepath)
+
+        if sel_cutstring:
+                rdf = rdf.Filter(sel_cutstring) #apply the selection
+
+        if not rdf:
+                print("Empty file for:", input_filepath, " Exiting.")
+                return
+
+        #denominator hist
+        print("Plotting at denominator:", ref_var.name)
+        has_variable_binning = False
+        
+        if not isinstance(ref_var.nbins, int):
+           has_variable_binning = True
+           hist_binEdges = np.array(ref_var.nbins, dtype=float)
+           hist_nBins = len(ref_var.nbins)-1
+           model = ROOT.RDF.TH1DModel(ref_var.name+"_model_hist", ref_var.name, hist_nBins, hist_binEdges)
+        else:
+           model = ROOT.RDF.TH1DModel(ref_var.name+"_model_hist", ref_var.name, ref_var.nbins, ref_var.xmin, ref_var.xmax)
+        
+        tmp_hist_den = rdf.Histo1D(model, ref_var.name)
+       
+        if not tmp_hist_den.GetEntries():
+           print("Empty histogram for:", input_filepath, " Exiting.")
+           return
+        else:
+           print("Number entries denumerator: ", tmp_hist_den.GetEntries())
+        tmp_den = getHfromRDF(tmp_hist_den)
+        #tmp_hist.GetXaxis().SetTitle(plot.label)
+        #tmp_hist.GetYaxis().SetTitle("Raw MC events")
+        #tmp_hist.SetFillColor(ROOT.kCyan+2)
+        #tmp_hist.SetLineColor(ROOT.kCyan+2)
+
+        for plot_name, plot in dict_of_vars.items():
+                print("Plotting:", plot_name)
+
+                has_variable_binning = False
+                if not isinstance(plot.nbins, int):
+                        has_variable_binning = True
+                        hist_binEdges = np.array(ref_var.nbins, dtype=float)
+                        hist_nBins = len(plot.nbins)-1
+                        model = ROOT.RDF.TH1DModel(plot_name+"_model_hist", plot_name, hist_nBins, hist_binEdges)
+                else:
+                        model = ROOT.RDF.TH1DModel(plot_name+"_model_hist", plot_name, plot.nbins, plot.xmin, plot.xmax)
+
+                tmp_hist_num = rdf.Histo1D(model, plot.name)               
+ 
+                if not tmp_hist_num.GetEntries():
+                        print("Empty histogram for:", input_filepath, " Exiting.")
+                        return
+                else:
+                        print("Number entries numerator: ", tmp_hist_num.GetEntries())
+
+                tmp_num = getHfromRDF(tmp_hist_num)
+               
+                if ROOT.TEfficiency.CheckConsistency(tmp_num, tmp_den):
+                   Eff = ROOT.TEfficiency(tmp_num, tmp_den)
+                   Eff.SetTitle(";"+plot.label+";Efficiency")
+                   Eff.SetTitle(plot_name)
+                   ptext = ROOT.TPaveText(.7,.85,.85,.9,option='NDC')
+                   if "loose" in plot_name:
+                      ptext.AddText("loose tag")
+                   elif "medium" in plot_name:
+                      ptext.AddText("medium tag")  
+                   elif "tight" in plot_name:
+                      ptext.AddText("tight tag")   
+                   else:
+                      ptext.AddText(plot_name)                     
+         #write out
+                   if sel_cutstring == "":
+                      filename = "bbtautau_noSel_Eff"+plot_name+out_format
+                   else:
+                      filename = "bbtautau_withSel_Eff"+plot_name+out_format
+                   fileout = os.path.join(out_dir_base, filename)
+
+                   canvas = ROOT.TCanvas("canvas", "canvas", 900, 700) 
+                   canvas.SetGrid()
+                   Eff.Draw("AP")
+                   canvas.Update()
+                   graph = Eff.GetPaintedGraph()
+                   graph.SetMinimum(0.6)
+                   graph.SetMaximum(1) 
+                   canvas.Update() 
+                   canvas.SetLogx()
+                   ptext.Draw()
+                   canvas.SaveAs(fileout)
+                else:
+                   print("The histograms are not consistent!")
+
+
+if __name__ == "__main__":
+
+   parser = argparse.ArgumentParser(description="Plot b-tagging efficiencies from Delphes")
+   parser.add_argument('--input', '-i', metavar="INPUTDIR", dest="inFile", required=True, help="Path to the input file.")
+   parser.add_argument('--outdir', '-o', metavar="OUTPUTDIR", dest="outDir", required=True, help="Output directory.")
+   args = parser.parse_args()
+
+   #use a custom namedntuple to transfer the plotting info
+   PlotSpecs = namedtuple('PlotSpecs', ['name', 'xmin', 'xmax', 'label', 'nbins'])
+
+   #plot some variables without any selection:
+   bbyy_plots_nosel = {
+      "n_b_jets_loose":PlotSpecs(name="n_b_jets_loose", xmin=0, xmax=5, label="n_b_jets_loose", nbins=5),
+      "n_b_jets_medium":PlotSpecs(name="n_b_jets_medium", xmin=0, xmax=5, label="n_b_jets_medium", nbins=5),
+      "n_b_jets_tight":PlotSpecs(name="n_b_jets_tight", xmin=0, xmax=5, label="n_b_jets_tight", nbins=5),
+      "n_jets_genmatched_b": PlotSpecs(name="n_jets_genmatched_b", xmin=0, xmax=5, label="n_jets_genmatched_b", nbins=5),
+      "n_bjets_loose_genmatched_b": PlotSpecs(name="n_bjets_loose_genmatched_b", xmin=0, xmax=5, label="n_bjets_loose_genmatched_b", nbins=5),
+      "n_bjets_medium_genmatched_b": PlotSpecs(name="n_bjets_medium_genmatched_b", xmin=0, xmax=5, label="n_bjets_medium_genmatched_b", nbins=5),
+      "n_bjets_tight_genmatched_b": PlotSpecs(name="n_bjets_tight_genmatched_b", xmin=0, xmax=5, label="n_bjets_tight_genmatched_b", nbins=5),
+   }
+
+   sel_cuts_empty = ""
+   makePlots(bbyy_plots_nosel, sel_cuts_empty, args.inFile, args.outDir)
+
+   #bjet tagging efficiencies:
+   binedges=[20,50,100,200,500,1000,5000] #same pT binning as in the delphes card
+   bbyy_plot_pTeff={
+      "pT_bjets_loose_genmatched_b":PlotSpecs(name="pT_bjets_loose_genmatched_b", xmin=0., xmax=1000., label="p_{T} bjets [GeV]", nbins=binedges),
+      "pT_bjets_medium_genmatched_b":PlotSpecs(name="pT_bjets_medium_genmatched_b", xmin=0., xmax=1000., label="p_{T} bjets [GeV]", nbins=binedges),
+      "pT_bjets_tight_genmatched_b":PlotSpecs(name="pT_bjets_tight_genmatched_b", xmin=0., xmax=1000., label="p_{T} bjets [GeV]", nbins=binedges),
+   }
+   #select only one eta bin:
+   sel_cuts_eta_4 = "n_jets_genmatched_b == 2 && abs(eta_jets_genmatched_b[0]) < 4. && abs(eta_jets_genmatched_b[1]) < 4."
+
+   pT_ref = PlotSpecs(name="pT_jets_genmatched_b", xmin=0., xmax=1000., label="p_{T} #tau_{h} [GeV]", nbins=binedges)
+   eta_ref = PlotSpecs(name="eta_jets_genmatched_b", xmin=-5, xmax=5., label="#eta  #tau_{h}", nbins=20)
+
+   makeEffPlots(bbyy_plot_pTeff, pT_ref, sel_cuts_eta_4, args.inFile, args.outDir)
+
+# run it with:
+# python plot_tag_eff.py -i <file> -o <out_name>