From e4150fcedd87b3e8ea8f44653819caf9c23cfc83 Mon Sep 17 00:00:00 2001
From: Birgit Stapf <birgit.sylvia.stapf@cern.ch>
Date: Fri, 27 Sep 2024 19:36:57 +0200
Subject: [PATCH] add first version of using event weights for scaling etc

---
 analyzers/dataframe/src/Analysis_FCChh.cc     |  20 +-
 examples/FCChh/ggHH_bbyy/analysis_final.py    |   3 +
 examples/FCChh/ggHH_bbyy/analysis_stage1.py   |   8 +-
 .../ggHH_bbyy/analysis_stage1_fullfile.py     | 173 ------------------
 python/process.py                             |  63 +++++++
 python/run_fccanalysis.py                     |  90 ++++++---
 python/run_final_analysis.py                  | 106 +++++++++--
 7 files changed, 241 insertions(+), 222 deletions(-)
 delete mode 100644 examples/FCChh/ggHH_bbyy/analysis_stage1_fullfile.py
diff --git a/analyzers/dataframe/src/Analysis_FCChh.cc b/analyzers/dataframe/src/Analysis_FCChh.cc
index 430ca03bcb..0bf52537db 100644
--- a/analyzers/dataframe/src/Analysis_FCChh.cc
+++ b/analyzers/dataframe/src/Analysis_FCChh.cc
@@ -924,6 +924,7 @@ AnalysisFCChh::get_tagged_jets(
   for (size_t pid_index = 0; pid_index < pids.size(); ++pid_index) {
 
     const auto tag =
+        // static_cast<unsigned>(tag_values[pid_index]);
         static_cast<unsigned>(tag_values[pids[pid_index].parameters_begin]);
 
     // std::cout << "Tag = " << tag << std::endl;
@@ -1145,7 +1146,8 @@ ROOT::VecOps::RVec<RecoParticlePair> AnalysisFCChh::getOSPairs(
   // FOR DEBUG?
   //  if (OS_pairs.size() > 1){
   //  	std::cout << "Number of possible OS pairs: " << OS_pairs.size() <<
-  //  std::endl; 	std::cout << "Build from: " << leptons_pos.size() << " pos, "
+  //  std::endl; 	std::cout << "Build from: " << leptons_pos.size() << "
+  //  pos, "
   //  << leptons_neg.size() << " neg." << std::endl;
   //  }
 
@@ -1570,8 +1572,8 @@ ROOT::VecOps::RVec<float> AnalysisFCChh::get_mT_pseudo(
 
 // 	//if one of the input particles is empty, just fill a default value of
 // -999 as mT 	if (particle_1.size() < 1 || particle_2.size() < 1||
-// MET_obj.size() < 1 ){ 		m_strans_vector.push_back(-999.); 		return
-// m_strans_vector;
+// MET_obj.size() < 1 ){ 		m_strans_vector.push_back(-999.);
+// return m_strans_vector;
 // 	}
 
 // 	TLorentzVector tlv_vis1 = getTLV_reco(particle_1.at(0));
@@ -1618,8 +1620,8 @@ ROOT::VecOps::RVec<float> AnalysisFCChh::get_mT_pseudo(
 
 // 	//if one of the input particles is empty, just fill a default value of
 // -999 as mT 	if (particle_1.size() < 1 || particle_2.size() < 1||
-// MET_obj.size() < 1 ){ 		m_strans_vector.push_back(-999.); 		return
-// m_strans_vector;
+// MET_obj.size() < 1 ){ 		m_strans_vector.push_back(-999.);
+// return m_strans_vector;
 // 	}
 
 // 	TLorentzVector tlv_vis1 = getTLV_reco(particle_1.at(0));
@@ -2496,7 +2498,7 @@ ROOT::VecOps::RVec<edm4hep::MCParticleData> AnalysisFCChh::getTruthTau(
           !isFromHiggsDirect(
               truth_part, parent_ids,
               truth_particles)) { //&& isFromHadron(truth_part, parent_ids,
-                                  //truth_particles) ) {
+                                  // truth_particles) ) {
         continue;
       }
       // select both from higgs or from hadrons
@@ -2541,7 +2543,7 @@ ROOT::VecOps::RVec<edm4hep::MCParticleData> AnalysisFCChh::getTruthTauLeps(
           !isFromHiggsDirect(
               truth_part, parent_ids,
               truth_particles)) { //&& isFromHadron(truth_part, parent_ids,
-                                  //truth_particles) ) {
+                                  // truth_particles) ) {
         continue;
       }
       // select both from higgs or from hadrons
@@ -2618,7 +2620,7 @@ ROOT::VecOps::RVec<edm4hep::MCParticleData> AnalysisFCChh::getTruthTauHads(
           !isFromHiggsDirect(
               truth_part, parent_ids,
               truth_particles)) { //&& isFromHadron(truth_part, parent_ids,
-                                  //truth_particles) ) {
+                                  // truth_particles) ) {
         continue;
       }
       // select both from higgs or from hadrons
@@ -3336,7 +3338,7 @@ AnalysisFCChh::find_reco_matches_exclusive(
     }
     // if (reco_match_vector.size() > 1){
     //	std::cout << "Warning in AnalysisFCChh::find_reco_matches() - Truth
-    //particle matched to more than one reco particle." << std::endl;
+    // particle matched to more than one reco particle." << std::endl;
     // }
 
     // check that the reco particle is not already in the out_vector
diff --git a/examples/FCChh/ggHH_bbyy/analysis_final.py b/examples/FCChh/ggHH_bbyy/analysis_final.py
index fd62d5345c..44a9e8c360 100644
--- a/examples/FCChh/ggHH_bbyy/analysis_final.py
+++ b/examples/FCChh/ggHH_bbyy/analysis_final.py
@@ -22,6 +22,9 @@
 
 saveTabular = True
 
+# Optional: Use weighted events
+do_weighted = True 
+
 ###Dictionnay of the list of cuts. The key is the name of the selection that will be added to the output file
 cutList = {
             "sel0_myy":"m_yy[0] > 100. && m_yy[0] < 180.",
diff --git a/examples/FCChh/ggHH_bbyy/analysis_stage1.py b/examples/FCChh/ggHH_bbyy/analysis_stage1.py
index 23eabc1cfe..e203a67af9 100644
--- a/examples/FCChh/ggHH_bbyy/analysis_stage1.py
+++ b/examples/FCChh/ggHH_bbyy/analysis_stage1.py
@@ -48,6 +48,9 @@ def __init__(self, cmdline_args):
         # Optional: running on HTCondor, default is False
         # self.run_batch = False
 
+        # Optional: Use weighted events
+        self.do_weighted = True 
+
         # Optional: test file that is used if you run with the --test argument (fccanalysis run ./examples/FCChh/ggHH_bbyy/analysis_stage1.py --test)
         self.test_file = 'root://eospublic.cern.ch//eos/experiment/fcc/hh/' \
                          'tutorials/edm4hep_tutorial_data/' \
@@ -130,8 +133,8 @@ def analyzers(self, dframe):
             ########################################### APPLY PRE-SELECTION ########################################### 
 
             #require at least two b-jets and two photons, both with invariant masses compatible with the Higgs mass
-            .Filter("sel_bjets.size()>1")
-            .Filter("sel_gamma.size()>1") 
+            # .Filter("sel_bjets.size()>1")
+            # .Filter("sel_gamma.size()>1") 
             # .Filter("m_bb[0] < 200.") 
             # .Filter("m_bb[0] > 80.") 
             # .Filter("m_yy[0] < 180.") 
@@ -147,6 +150,7 @@ def output(self):
         Output variables which will be saved to output root file.
         '''
         branch_list = [
+            'weight',
             # Photons and H(yy) system:
             'ngamma', 'g1_pt', 'g2_pt', 'g1_eta', 'g2_eta', 'm_yy',
             # b-jets and H(bb) system:
diff --git a/examples/FCChh/ggHH_bbyy/analysis_stage1_fullfile.py b/examples/FCChh/ggHH_bbyy/analysis_stage1_fullfile.py
deleted file mode 100644
index 16d0810216..0000000000
--- a/examples/FCChh/ggHH_bbyy/analysis_stage1_fullfile.py
+++ /dev/null
@@ -1,173 +0,0 @@
-'''
-Analysis example for FCC-hh, using gg->HH->bbyy di-Higgs production events 
-'''
-from argparse import ArgumentParser
-
-
-# Mandatory: Analysis class where the user defines the operations on the
-# dataframe.
-class Analysis():
-    '''
-    Di-Higgs analysis in bbyy.
-    '''
-    def __init__(self, cmdline_args):
-        parser = ArgumentParser(
-            description='Additional analysis arguments',
-            usage='Provide additional arguments after analysis script path')
-        # parser.add_argument('--bjet-pt', default='10.', type=float,
-        #                     help='Minimal pT of the selected b-jets.')
-        # Parse additional arguments not known to the FCCAnalyses parsers
-        # All command line arguments know to fccanalysis are provided in the
-        # `cmdline_arg` dictionary.
-        self.ana_args, _ = parser.parse_known_args(cmdline_args['unknown'])
-
-        # Mandatory: List of processes to run over
-        self.process_list = {
-            # # Add your processes like this: 
-            ## '<name of process>':{'fraction':<fraction of events to run over>, 'chunks':<number of chunks to split the output into>, 'output':<name of the output file> }, 
-            # # - <name of process> needs to correspond either the name of the input .root file, or the name of a directory containing root files 
-            # # If you want to process only part of the events, split the output into chunks or give a different name to the output use the optional arguments
-            # # or leave blank to use defaults = run the full statistics in one output file named the same as the process:
-            'pwp8_pp_hh_lambda100_5f_hhbbaa': {},
-        }
-
-        # Mandatory: Input directory where to find the samples, or a production tag when running over the centrally produced
-        # samples (this points to the yaml files for getting sample statistics)
-        self.input_dir = '/eos/experiment/fcc/hh/generation/DelphesEvents/fcc_v06_scenarioI/'
-        # self.prod_tag = 'FCCee/spring2021/IDEA/'
-
-        # Optional: output directory, default is local running directory
-        self.output_dir = 'fcc_v06_bbyy/' \
-                          # f'stage1_{self.ana_args.muon_pt}'
-
-        # Optional: analysisName, default is ''
-        # self.analysis_name = 'My Analysis'
-
-        # Optional: number of threads to run on, default is 'all available'
-        # self.n_threads = 4
-
-        # Optional: running on HTCondor, default is False
-        # self.run_batch = False
-
-        # Optional: test file that is used if you run with the --test argument (fccanalysis run ./examples/FCChh/ggHH_bbyy/analysis_stage1.py --test)
-        self.test_file = 'root://eospublic.cern.ch//eos/experiment/fcc/hh/' \
-                         'tutorials/edm4hep_tutorial_data/' \
-                         'p8_ee_ZH_ecm240.root'
-
-
-    # Mandatory: analyzers function to define the analysis graph, please make
-    # sure you return the dataframe, in this example it is dframe2
-    def analyzers(self, dframe):
-        '''
-        Analysis graph.
-        '''
-
-        dframe2 = (
-            dframe
-
-            .Define("weight",  "EventHeader.weight")
-
-            ########################################### PHOTONS ########################################### 
-
-            .Define("gamma",  "FCCAnalyses::ReconstructedParticle::get(Photon_objIdx.index, ReconstructedParticles)")
-            .Define("selpt_gamma", "FCCAnalyses::ReconstructedParticle::sel_pt(30.)(gamma)")
-            .Define("sel_gamma_unsort", "FCCAnalyses::ReconstructedParticle::sel_eta(4)(selpt_gamma)")
-            .Define("sel_gamma", "AnalysisFCChh::SortParticleCollection(sel_gamma_unsort)")
-
-            .Define("ngamma",  "FCCAnalyses::ReconstructedParticle::get_n(sel_gamma)") 
-            .Define("g1_e",  "FCCAnalyses::ReconstructedParticle::get_e(sel_gamma)[0]")
-            .Define("g1_pt",  "FCCAnalyses::ReconstructedParticle::get_pt(sel_gamma)[0]")
-            .Define("g1_eta",  "FCCAnalyses::ReconstructedParticle::get_eta(sel_gamma)[0]")
-            .Define("g1_phi",  "FCCAnalyses::ReconstructedParticle::get_phi(sel_gamma)[0]")
-            .Define("g2_e",  "FCCAnalyses::ReconstructedParticle::get_e(sel_gamma)[1]")
-            .Define("g2_pt",  "FCCAnalyses::ReconstructedParticle::get_pt(sel_gamma)[1]")
-            .Define("g2_eta",  "FCCAnalyses::ReconstructedParticle::get_eta(sel_gamma)[1]")
-            .Define("g2_phi",  "FCCAnalyses::ReconstructedParticle::get_phi(sel_gamma)[1]")
-
-            #H(yy) if it exists, if there are no 2 selected photons, doesnt get filled 
-            .Define("yy_pairs_unmerged", "AnalysisFCChh::getPairs(sel_gamma)")
-            .Define("yy_pairs", "AnalysisFCChh::merge_pairs(yy_pairs_unmerged)")
-            .Define("m_yy", "FCCAnalyses::ReconstructedParticle::get_mass(yy_pairs)")
-
-
-            ########################################### JETS ########################################### 
-
-            # jets after overlap removal is performed between jets and isolated electrons, muons and photons
-
-            # selected jets above a pT threshold of 30 GeV, eta < 4, tight ID 
-            .Define("selpt_jets", "FCCAnalyses::ReconstructedParticle::sel_pt(30.)(Jet)")
-            .Define("sel_jets_unsort", "FCCAnalyses::ReconstructedParticle::sel_eta(4)(selpt_jets)")
-            .Define("sel_jets", "AnalysisFCChh::SortParticleCollection(sel_jets_unsort)") 
-            .Define("njets",  "FCCAnalyses::ReconstructedParticle::get_n(sel_jets)")
-            .Define("j1_e",  "FCCAnalyses::ReconstructedParticle::get_e(sel_jets)[0]")
-            .Define("j1_pt",  "FCCAnalyses::ReconstructedParticle::get_pt(sel_jets)[0]")
-            .Define("j1_eta",  "FCCAnalyses::ReconstructedParticle::get_eta(sel_jets)[0]")
-            .Define("j1_phi",  "FCCAnalyses::ReconstructedParticle::get_phi(sel_jets)[0]")
-
-            .Define("j2_e",  "FCCAnalyses::ReconstructedParticle::get_e(sel_jets)[1]")
-            .Define("j2_pt",  "FCCAnalyses::ReconstructedParticle::get_pt(sel_jets)[1]")
-            .Define("j2_eta",  "FCCAnalyses::ReconstructedParticle::get_eta(sel_jets)[1]")
-            .Define("j2_phi",  "FCCAnalyses::ReconstructedParticle::get_phi(sel_jets)[1]")
-
-            #b-tagged jets:
-            # .Alias("Jet3","Jet#3.index") 
-            .Alias("Jet3","_Jet_particles.index")
-            #LOOSE WP : bit 1 = medium WP, bit 2 = tight WP
-            #b tagged jets
-            .Define("bjets", "AnalysisFCChh::get_tagged_jets(ReconstructedParticles, ParticleIDs, _ParticleIDs_particle, _ParticleIDs_parameters, 1)") #bit 0 = loose WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl
-            # .Define("bjets", "AnalysisFCChh::get_tagged_jets(Jet, Jet3, ParticleIDs, _ParticleIDs_parameters, 1)") #bit 0 = loose WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl
-            # .Define("selpt_bjets", "FCCAnalyses::ReconstructedParticle::sel_pt(30.)(bjets)")
-            # .Define("sel_bjets_unsort", "FCCAnalyses::ReconstructedParticle::sel_eta(4)(selpt_bjets)")
-            # .Define("sel_bjets", "AnalysisFCChh::SortParticleCollection(sel_bjets_unsort)")
-            .Alias("sel_bjets", "bjets")
-            .Define("nbjets", "FCCAnalyses::ReconstructedParticle::get_n(sel_bjets)")
-            .Define("b1_e",  "FCCAnalyses::ReconstructedParticle::get_e(sel_bjets)[0]")
-            .Define("b1_pt",  "FCCAnalyses::ReconstructedParticle::get_pt(sel_bjets)[0]")
-            .Define("b1_eta",  "FCCAnalyses::ReconstructedParticle::get_eta(sel_bjets)[0]")
-            .Define("b1_phi",  "FCCAnalyses::ReconstructedParticle::get_phi(sel_bjets)[0]")
-            .Define("b2_e",  "FCCAnalyses::ReconstructedParticle::get_e(sel_bjets)[1]")
-            .Define("b2_pt",  "FCCAnalyses::ReconstructedParticle::get_pt(sel_bjets)[1]")
-            .Define("b2_eta",  "FCCAnalyses::ReconstructedParticle::get_eta(sel_bjets)[1]")
-            .Define("b2_phi",  "FCCAnalyses::ReconstructedParticle::get_phi(sel_bjets)[1]")
-
-
-            # #H(bb) system - using the loose WP b-jets -> if the events has < 2 bjets these variables do not get filled!
-            .Define("bb_pairs_unmerged", "AnalysisFCChh::getPairs(sel_bjets)") #currently gets only leading pT pair, as a RecoParticlePair
-            #then merge the bb pair into one object and get its kinematic properties
-            .Define("bb_pairs", "AnalysisFCChh::merge_pairs(bb_pairs_unmerged)") #merge into one object to access inv masses etc
-            .Define("m_bb", "FCCAnalyses::ReconstructedParticle::get_mass(bb_pairs)")
-
-            # Filter at least one candidate
-            .Filter("sel_bjets.size()>1")
-            # .Filter("sel_gamma.size()>1") 
-            # .Filter("m_bb[0] < 200.") 
-            # .Filter("m_bb[0] > 80.") 
-            # .Filter("m_yy[0] < 180.") 
-            # .Filter("m_yy[0] > 100.")   
-
-        )
-        return dframe2
-
-    # Mandatory: output function, please make sure you return the branch list
-    # as a python list
-    def output(self):
-        '''
-        Output variables which will be saved to output root file.
-        '''
-        branch_list = [
-            'nbjets',
-            # 'm_bb',
-            'ngamma',
-            'm_yy',
-            'm_bb',
-            'b1_pt', 'b2_pt', 'b1_eta', 'b2_eta',
-            # 'selected_muons_pt',
-            # 'selected_muons_y',
-            # 'selected_muons_p',
-            # 'selected_muons_e',
-            # 'zed_leptonic_pt',
-            # 'zed_leptonic_m',
-            # 'zed_leptonic_charge',
-            # 'zed_leptonic_recoil_m'
-        ]
-        return branch_list
diff --git a/python/process.py b/python/process.py
index 2b74c5c771..f6902ae258 100644
--- a/python/process.py
+++ b/python/process.py
@@ -7,6 +7,7 @@
 import json
 import glob
 import logging
+from typing import Optional
 import urllib.request
 import yaml  # type: ignore
 import ROOT  # type: ignore
@@ -30,6 +31,68 @@ def get_entries(inpath: str) -> int:
     return nevents
 
 
+def get_entries_sow(infilepath: str, nevents_max: Optional[int] = None, get_local: bool = True, weight_name: str = "EventHeader.weight") -> tuple[int, int, float, float]:
+    '''
+    Get number of original entries and number of actual entries in the file, as well as the sum of weights
+    '''
+
+    infile = ROOT.TFile.Open(infilepath)
+    infile.cd()
+
+    processEvents = 0
+    processSumOfWeights = 0.
+    try:
+        processEvents = infile.Get('eventsProcessed').GetVal()
+    except AttributeError:
+        print('----> Warning: Input file is missing information about ' # should these warnings be kept? 
+              'original number of events!')
+    try:
+        processSumOfWeights = infile.Get('SumOfWeights').GetVal()
+    except AttributeError:
+        print('----> Warning: Input file is missing information about '
+              'original sum of weights!')
+
+    if not get_local:
+        return processEvents, None, processSumOfWeights, None
+
+    eventsTTree = 0
+    sumOfWeightsTTree = 0.
+
+     # check for empty chunk (can this be improved? exception from RDF cannot be caught it seems?)
+    tree =infile.Get("events")
+    if not tree:
+        print("Tree not found in file", infilepath, " possibly empty chunk - continuing with next one.")
+        infile.Close()
+        return processEvents, eventsTTree, processSumOfWeights, sumOfWeightsTTree
+
+    try:
+
+         #use a RDF here too so the nevents restriction option can be imposed easily for the local events
+        rdf_tmp = ROOT.ROOT.RDataFrame("events", infilepath)
+
+        if nevents_max:
+            rdf_tmp = rdf_tmp.Range(0, nevents_max)
+
+        eventsTTree = rdf_tmp.Count().GetValue()
+
+        # eventsTTree = infile.Get("events").GetEntries()
+        ROOT.gROOT.SetBatch(True)
+        try:
+            # infile.Get("events").Draw('EventHeader.weight[0]>>histo')
+            # histo=ROOT.gDirectory.Get('histo')
+            histo = rdf_tmp.Histo1D(weight_name)
+            sumOfWeightsTTree=float(eventsTTree)*histo.GetMean()
+        except AttributeError:
+            print('----> Warning: Input file has no event weights.')
+    except AttributeError:
+        print('----> Error: Input file is missing events TTree! Probably empty chunk.')
+        infile.Close()
+
+    infile.Close()
+
+    return processEvents, eventsTTree, processSumOfWeights, sumOfWeightsTTree
+
+
 def get_process_info(process: str,
                      prod_tag: str,
                      input_dir: str) -> tuple[list[str], list[int]]:
diff --git a/python/run_fccanalysis.py b/python/run_fccanalysis.py
index e6f4f86c0c..e6e13b3f3b 100644
--- a/python/run_fccanalysis.py
+++ b/python/run_fccanalysis.py
@@ -14,7 +14,7 @@
 
 import ROOT  # type: ignore
 from anascript import get_element, get_element_dict, get_attribute
-from process import get_process_info
+from process import get_process_info, get_entries_sow
 from frame import generate_graph
 
 LOGGER = logging.getLogger('FCCAnalyses.run')
@@ -455,7 +455,7 @@ def apply_filepath_rewrites(filepath: str) -> str:
 
 
 # _____________________________________________________________________________
-def run_local(args, analysis, infile_list):
+def run_local(args, analysis, infile_list, do_weighted = False):
     '''
     Run analysis locally.
     '''
@@ -467,38 +467,65 @@ def run_local(args, analysis, infile_list):
     nevents_orig = 0
     # The amount of events in the input file(s)
     nevents_local = 0
+
+    # Same for the sum of weights
+    if do_weighted:
+        sow_orig = 0.
+        sow_local = 0.
+
     for filepath in infile_list:
 
         filepath = apply_filepath_rewrites(filepath)
 
         file_list.push_back(filepath)
         info_msg += f'- {filepath}\t\n'
-        infile = ROOT.TFile.Open(filepath, 'READ')
-        try:
-            nevents_orig += infile.Get('eventsProcessed').GetVal()
-        except AttributeError:
-            pass
 
-        try:
-            nevents_local += infile.Get("events").GetEntries()
-        except AttributeError:
-            LOGGER.error('Input file:\n%s\nis missing events TTree!\n'
-                         'Aborting...', filepath)
+        if do_weighted:
+
+            if args.nevents > 0:
+                nevts_param, nevts_tree, sow_param, sow_tree = get_entries_sow(filepath, args.nevents)
+            else:
+                nevts_param, nevts_tree, sow_param, sow_tree = get_entries_sow(filepath)
+
+            nevents_orig += nevts_param
+            nevents_local += nevts_tree
+            sow_orig += sow_param
+            sow_local += sow_tree
+
+        else:
+            infile = ROOT.TFile.Open(filepath, 'READ')
+            try:
+                nevents_orig += infile.Get('eventsProcessed').GetVal()
+            except AttributeError:
+                pass
+
+            try:
+                nevents_local += infile.Get("events").GetEntries()
+            except AttributeError:
+                LOGGER.error('Input file:\n%s\nis missing events TTree!\n'
+                             'Aborting...', filepath)
+                infile.Close()
+                sys.exit(3)
             infile.Close()
-            sys.exit(3)
-        infile.Close()
 
-    LOGGER.info(info_msg)
+            if args.nevents > 0 and args.nevents < nevents_local:
+                nevents_local = args.nevents
 
-    # Adjust number of events in case --nevents was specified
-    if args.nevents > 0 and args.nevents < nevents_local:
-        nevents_local = args.nevents
 
+    LOGGER.info(info_msg)
+
+   
     if nevents_orig > 0:
         LOGGER.info('Number of events:\n\t- original: %s\n\t- local:    %s',
                     f'{nevents_orig:,}', f'{nevents_local:,}')
+        if do_weighted:
+            LOGGER.info('Sum of weights:\n\t- original: %s\n\t- local:    %s',
+                        f'{sow_orig:,}', f'{sow_local:,}')
     else:
         LOGGER.info('Number of local events: %s', f'{nevents_local:,}')
+        if do_weighted:
+            LOGGER.info('Local sum of weights: %s', f'{sow_local:,}')
+
 
     output_dir = get_attribute(analysis, 'output_dir', '')
     if not args.batch:
@@ -529,7 +556,7 @@ def run_local(args, analysis, infile_list):
     if nevents_orig > 0:
         info_msg += f'\nReduction factor total:  {outn/nevents_orig}'
     info_msg += '\n'
-    info_msg += 80 * '='
+    info_msg += 80 * '=' 
     info_msg += '\n'
     LOGGER.info(info_msg)
 
@@ -540,8 +567,15 @@ def run_local(args, analysis, infile_list):
                 'eventsProcessed',
                 nevents_orig if nevents_orig != 0 else inn)
         param.Write()
-        param = ROOT.TParameter(int)('eventsSelected', outn)
+        param = ROOT.TParameter(int)('eventsSelected', outn) # Should no of weighted, selected events be added as well? 
         param.Write()
+
+        if do_weighted:
+            param_sow = ROOT.TParameter(float)( 
+                        'SumOfWeights', 
+                        sow_orig if sow_orig != 0 else sow_local )
+            param_sow.Write()
+
         outfile.Write()
 
     if args.bench:
@@ -590,6 +624,12 @@ def run_fccanalysis(args, analysis_module):
     if output_dir_eos is not None and not os.path.exists(output_dir_eos):
         os.system(f'mkdir -p {output_dir_eos}')
 
+    # Check if using weighted events is requested
+    do_weighted = get_attribute(analysis, 'do_weighted', False)
+
+    if do_weighted:
+        LOGGER.info('Using generator weights')
+
     # Check if test mode is specified, and if so run the analysis on it (this
     # will exit after)
     if args.test:
@@ -598,7 +638,7 @@ def run_fccanalysis(args, analysis_module):
         directory, _ = os.path.split(args.output)
         if directory:
             os.system(f'mkdir -p {directory}')
-        run_local(args, analysis, [testfile_path])
+        run_local(args, analysis, [testfile_path], do_weighted=do_weighted)
         sys.exit(0)
 
     # Check if files are specified, and if so run the analysis on it/them (this
@@ -608,7 +648,7 @@ def run_fccanalysis(args, analysis_module):
         directory, _ = os.path.split(args.output)
         if directory:
             os.system(f'mkdir -p {directory}')
-        run_local(args, analysis, args.files_list)
+        run_local(args, analysis, args.files_list, do_weighted=do_weighted)
         sys.exit(0)
 
     # Check if batch mode is available
@@ -629,6 +669,8 @@ def run_fccanalysis(args, analysis_module):
                      'analysis script!\nAborting...')
         sys.exit(3)
 
+
+
     for process_name in process_list:
         LOGGER.info('Started processing sample "%s" ...', process_name)
         file_list, event_list = get_process_info(process_name,
@@ -693,11 +735,11 @@ def run_fccanalysis(args, analysis_module):
             LOGGER.info('Running locally...')
             if len(chunk_list) == 1:
                 args.output = f'{output_stem}.root'
-                run_local(args, analysis, chunk_list[0])
+                run_local(args, analysis, chunk_list[0], do_weighted=do_weighted)
             else:
                 for index, chunk in enumerate(chunk_list):
                     args.output = f'{output_stem}/chunk{index}.root'
-                    run_local(args, analysis, chunk)
+                    run_local(args, analysis, chunk, do_weighted=do_weighted)
 
     if len(process_list) == 0:
         LOGGER.warning('No files processed (process_list not found)!\n'
diff --git a/python/run_final_analysis.py b/python/run_final_analysis.py
index 7eaa7740b0..f9943a15d5 100644
--- a/python/run_final_analysis.py
+++ b/python/run_final_analysis.py
@@ -15,7 +15,7 @@
 import ROOT  # type: ignore
 import cppyy
 from anascript import get_element, get_attribute
-from process import get_process_dict
+from process import get_process_dict, get_entries_sow
 from frame import generate_graph
 
 LOGGER = logging.getLogger('FCCAnalyses.run_final')
@@ -230,6 +230,14 @@ def run(rdf_module, args) -> None:
     file_list = {}
     results = {}
 
+    # Check if using weighted events is requested
+    do_weighted = get_attribute(rdf_module, 'do_weighted', False)
+
+    if do_weighted:
+        LOGGER.info('Using generator weights')
+        sow_process={}
+        sow_ttree={}
+
     # Checking input directory
     input_dir = get_attribute(rdf_module, 'inputDir', '')
     if not input_dir:
@@ -265,27 +273,50 @@ def run(rdf_module, args) -> None:
     for process_name in process_list:
         process_events[process_name] = 0
         events_ttree[process_name] = 0
+        if do_weighted:
+            sow_process[process_name] = 0.
+            sow_ttree[process_name] = 0.
+
         file_list[process_name] = ROOT.vector('string')()
 
         infilepath = input_dir + process_name + '.root'  # input file
+        
         if not os.path.isfile(infilepath):
             LOGGER.debug('File %s does not exist!\nTrying if it is a '
                          'directory as it might have been processed in batch.',
                          infilepath)
         else:
             LOGGER.info('Open file:\n  %s', infilepath)
-            process_events[process_name], events_ttree[process_name] = \
-                get_entries(infilepath)
+            if do_weighted:
+                process_events[process_name], events_ttree[process_name], \
+                sow_process[process_name], sow_ttree[process_name] = \
+                    get_entries_sow(infilepath, weight_name="weight")
+            else:
+                process_events[process_name], events_ttree[process_name] = \
+                    get_entries(infilepath)
             file_list[process_name].push_back(infilepath)
 
         indirpath = input_dir + process_name
         if os.path.isdir(indirpath):
+            #reset the nevts/sow counters to avoid wrong counting in case a single file of same name (e.g. local test output) also exists in the same directory
+            process_events[process_name] = 0
+            events_ttree[process_name] = 0
+            sow_process[process_name] = 0.
+            sow_ttree[process_name] = 0.
+
             info_msg = f'Open directory {indirpath}'
             flist = glob.glob(indirpath + '/chunk*.root')
             for filepath in flist:
                 info_msg += '\n\t' + filepath
-                chunk_process_events, chunk_events_ttree = \
-                    get_entries(filepath)
+                if do_weighted:
+                    chunk_process_events, chunk_events_ttree, \
+                    chunk_sow_process, chunk_sow_ttree = \
+                    get_entries_sow(filepath, weight_name="weight")
+                    sow_process[process_name] += chunk_sow_process
+                    sow_ttree[process_name] += chunk_sow_ttree
+                else:
+                    chunk_process_events, chunk_events_ttree = \
+                        get_entries(filepath)
                 process_events[process_name] += chunk_process_events
                 events_ttree[process_name] += chunk_events_ttree
                 file_list[process_name].push_back(filepath)
@@ -300,6 +331,17 @@ def run(rdf_module, args) -> None:
         info_msg += f'\n\t- {process_name}: {n_events:,}'
     LOGGER.info(info_msg)
 
+    if do_weighted:
+        info_msg = 'Processed sum of weights:'
+        for process_name, sow in sow_process.items():
+            info_msg += f'\n\t- {process_name}: {sow:,}'
+        LOGGER.info(info_msg)
+        info_msg = 'Sum of weights in the TTree:'
+        for process_name, sow in sow_ttree.items():
+            info_msg += f'\n\t- {process_name}: {sow:,}'
+        LOGGER.info(info_msg)
+
+
     # Check if there are any histograms defined
     histo_list: dict[str, dict[str, any]] = get_attribute(rdf_module,
                                                           "histoList", {})
@@ -446,6 +488,7 @@ def run(rdf_module, args) -> None:
         # Now perform the loop and evaluate everything at once.
         LOGGER.info('Evaluating...')
         all_events_raw = dframe.Count().GetValue()
+
         LOGGER.info('Done')
 
         nevents_real += all_events_raw
@@ -453,10 +496,19 @@ def run(rdf_module, args) -> None:
 
         if do_scale:
             LOGGER.info('Scaling cut yields...')
-            all_events = all_events_raw * 1. * gen_sf * \
-                int_lumi / process_events[process_name]
-            uncertainty = ROOT.Math.sqrt(all_events_raw) * gen_sf * \
-                int_lumi / process_events[process_name]
+            if do_weighted:
+                #should add check if the weight column exists!
+                all_events_weighted = dframe.Sum("weight").GetValue() #this will only work if "weight" column is defined in the analysis script, should weight name be an argument?
+                print( all_events_weighted)
+                all_events = all_events_weighted * 1. * gen_sf * \
+                    int_lumi / sow_process[process_name]
+                uncertainty = ROOT.Math.sqrt(all_events_weighted) * gen_sf * \
+                    int_lumi / sow_process[process_name]
+            else:
+                all_events = all_events_raw * 1. * gen_sf * \
+                    int_lumi / process_events[process_name]
+                uncertainty = ROOT.Math.sqrt(all_events_raw) * gen_sf * \
+                    int_lumi / process_events[process_name]
         else:
             all_events = all_events_raw
             uncertainty = ROOT.Math.sqrt(all_events_raw)
@@ -518,8 +570,12 @@ def run(rdf_module, args) -> None:
                     hist_name = hist.GetName() + '_raw'
                     outfile.WriteObject(hist.GetValue(), hist_name)
                     if do_scale:
-                        hist.Scale(gen_sf * int_lumi /
-                                   process_events[process_name])
+                        if do_weighted:
+                            hist.Scale(gen_sf * int_lumi /
+                                       sow_process[process_name])         
+                        else:
+                            hist.Scale(gen_sf * int_lumi /
+                                       process_events[process_name])
                         outfile.WriteObject(hist.GetValue())
 
                 # write all metadata info to the output file
@@ -527,9 +583,15 @@ def run(rdf_module, args) -> None:
                                              process_events[process_name])
                 outfile.WriteObject(param)
 
-                param = ROOT.TParameter(float)("sumOfWeights",
-                                               process_events[process_name])
-                outfile.WriteObject(param)
+                if do_weighted:
+                    param = ROOT.TParameter(float)("sumOfWeights",
+                                                   sow_process[process_name])
+                    outfile.WriteObject(param)
+
+                else:
+                    param = ROOT.TParameter(float)("sumOfWeights",
+                                                   process_events[process_name])
+                    outfile.WriteObject(param) 
 
                 param = ROOT.TParameter(bool)("scaled",
                                               do_scale)
@@ -567,6 +629,22 @@ def run(rdf_module, args) -> None:
                                  'events!', cut, process_name)
                     sys.exit(3)
 
+                #store also the TParameters for total number of events and sum of weights to the trees
+                print("Updating file", fout_list[i])
+                outfile = ROOT.TFile(fout_list[i], 'update')
+                param = ROOT.TParameter(int)('eventsProcessed', process_events[process_name])
+                print("Number of events processed:", process_events[process_name])
+                param.Write()
+                if do_weighted:
+                    param2 = ROOT.TParameter(float)('SumOfWeights', sow_process[process_name])
+                    print("Sum of weights:", sow_process[process_name])
+                    param2.Write()
+                outfile.Write()
+                outfile.Close()
+
+
+
+
     # Save results either to JSON or LaTeX tables
     save_results(results, rdf_module)