cms-tau-pog · jmalvaso · Jun 26, 2024 · Jul 4, 2024 · IzaakWN · Jul 1, 2024
diff --git a/PicoProducer/python/analysis/ModuleMuTau.py b/PicoProducer/python/analysis/ModuleMuTau.py
@@ -9,7 +9,6 @@
 #from TauFW.PicoProducer.corrections.TrigObjMatcher import loadTriggerDataFromJSON, TrigObjMatcher
 from TauPOG.TauIDSFs.TauIDSFTool import TauIDSFTool, TauESTool, campaigns
 
-
 class ModuleMuTau(ModuleTauPair):
 
   def __init__(self, fname, **kwargs):

diff --git a/PicoProducer/python/analysis/ModuleTauPair.py b/PicoProducer/python/analysis/ModuleTauPair.py
@@ -3,7 +3,7 @@
 from ROOT import TFile, TTree
 import sys, re
 from math import sqrt, exp, cos
-from ROOT import TLorentzVector, TVector3
+from ROOT import TLorentzVector, TVector3, RDataFrame, TH1D
 from PhysicsTools.NanoAODTools.postprocessing.framework.eventloop import Module
 from PhysicsTools.NanoAODTools.postprocessing.framework.datamodel import Collection, Event
 from TauFW.PicoProducer.corrections.PileupTool import *
@@ -13,6 +13,8 @@
 from TauFW.PicoProducer.corrections.BTagTool import BTagWeightTool, BTagWPs
 from TauFW.common.tools.log import header
 from TauFW.PicoProducer.analysis.utils import ensurebranches, redirectbranch, deltaPhi, getmet, getmetfilters, correctmet, getlepvetoes, filtermutau
+from TauFW.PicoProducer.analysis.utils import getTotalWeight, getNevt, save_norm_histo
+
 __metaclass__ = type # to use super() with subclasses from CommonProducer
 tauSFVersion  = {
   2016: '2016Legacy', 2017: '2017ReReco', 2018: '2018ReReco',
@@ -41,7 +43,7 @@ def __init__(self, fname, **kwargs):
     self.ees        = kwargs.get('ees',      1.0            ) # electron energy scale
     self.tes        = kwargs.get('tes',      None           ) # tau energy scale; if None, recommended values are applied
     self.tessys     = kwargs.get('tessys',   None           ) # vary TES: 'Up' or 'Down'
-    self.dotausfs   = kwargs.get('dotausfs', True           ) # compute tau SFs with TauIDSF tool
+    self.dotausfs   = kwargs.get('dotausfs', True         ) # compute tau SFs with TauIDSF tool
     self.fes        = kwargs.get('fes',      None           ) # electron-tau-fake energy scale: None, 'Up' or 'Down' (override with 'ltf=1')
     self.ltf        = kwargs.get('ltf',      None           ) # lepton-tau-fake energy scale
     self.jtf        = kwargs.get('jtf',      1.0            ) or 1.0 # jet-tau-fake energy scale
@@ -63,6 +65,7 @@ def __init__(self, fname, **kwargs):
     self.verbosity  = kwargs.get('verb',     0              ) # verbosity
     self.jetCutPt   = kwargs.get('jetpt',    30             )
     self.doSVfit    = kwargs.get('SVfit',    False          ) # add ditau mass estimated with SVfit (any(self.channel==c for c in ['etau','mutau','tautau']))
+    self.firstevt   = kwargs.get('firstevt')
     self.bjetCutEta = 2.4 if self.year==2016 else 2.5
     self.isUL       = 'UL' in self.era
 
@@ -145,16 +148,17 @@ def endJob(self):
     if self.ismc:
       self.btagTool.setDir(self.out.outfile,'btag')
     self.out.endJob()
-
-
+
   def beginFile(self, inputFile, outputFile, inputTree, wrappedOutputTree):
     """Before processing a new file."""
     sys.stdout.flush()
-
+    if self.firstevt == 0:
+      print("Creating Normalization Histogram")
+      save_norm_histo(inputFile, self.out.outfile)
     # for v10
     branchesV10 = [
-      ('Muon_isTracker',                  [True]*32     ),
-      #('Electron_mvaFall17V217Iso',      [1.]*32       ), #not available anymore
+      ('Muon_isTracker',                                [True]*32     ),
+      #('Electron_mvaFall17V217Iso',                    [1.]*32       ), #not available anymore
       ('Electron_lostHits',               [0]*32        ),
       ('Electron_mvaFall17V2Iso_WPL',    'Electron_mvaIso_WPL'        ),
       ('Electron_mvaFall17V2Iso_WP80',   'Electron_mvaIso_WP80'       ),
@@ -200,15 +204,13 @@ def beginFile(self, inputFile, outputFile, inputTree, wrappedOutputTree):
         ('HLT_IsoMu24',          False ),
         ('HLT_IsoTkMu24',        False ),
       ]
-
     #check
     fullbranchlist = inputTree.GetListOfBranches()
     if 'Electron_mvaFall17Iso_WPL' not in fullbranchlist: #v10
       ensurebranches(inputTree,branchesV10)
     else: #v9
       ensurebranches(inputTree,branches) # make sure Event object has these branches
-
-
+
   def jetveto(self, event):
     """Return number of vetoed jets. Jet veto maps are mandatory for Run 3 analyses.
     The safest procedure would be to veto events if ANY jet with a loose selection lies in the veto regions.
@@ -280,7 +282,7 @@ def fillEventBranches(self,event):
     self.out.lumi[0]            = event.luminosityBlock
     self.out.npv[0]             = event.PV_npvs
     self.out.npv_good[0]        = event.PV_npvsGood
-    self.out.metfilter[0]       = self.filter(event)
+    self.out.metfilter[0]       = self.filter(event) 
 
     if self.ismc:
       ###self.out.ngentauhads[0]   = ngentauhads

diff --git a/PicoProducer/python/analysis/utils.py b/PicoProducer/python/analysis/utils.py
@@ -11,6 +11,7 @@
 from TauFW.common.tools.file import ensuremodule as _ensuremodule
 from TauFW.common.tools.log import Logger
 from PhysicsTools.NanoAODTools.postprocessing.framework.datamodel import Collection, Event, Object
+import numpy as np
 LOG = Logger('Analysis')
 
 
@@ -328,7 +329,8 @@ def getmetfilters(era,isdata,verb=0):
   if ('2017' in era or '2018' in era) and ('UL' not in era):
     filters.extend(['Flag_ecalBadCalibFilterV2']) # under review for change in Ultra Legacy
   # https://twiki.cern.ch/twiki/bin/viewauth/CMS/MissingETOptionalFiltersRun2#Run_2_recommendations
-  if '2022' in era or '2023' in era: 
+  if '2022' in era or '2023' in era:
+    #from IPython import embed; embed() 
     filters.extend(['Flag_BadPFMuonDzFilter'])
     filters.extend(['Flag_hfNoisyHitsFilter'])
     filters.extend(['Flag_eeBadScFilter'])
@@ -386,6 +388,7 @@ def getlepvetoes(event, electrons, muons, taus, channel, era):
   looseElectrons = [ ]
   for electron in Collection(event,'Electron'): 
     if '2022' in era:
+      #from IPython import embed; embed()
       electronIso90 = electron.mvaIso_Fall17V2_WP90
       electronIso   = electron.mvaIso_Fall17V2_WPL
     elif '2023' in era:
@@ -474,7 +477,70 @@ def getTotalWeight(file, selections=[""]):
         total_w[iSel] += w.GetValue()
     return total_w
 
-def getNevt(file): #This function extracts the number of events form Data files
-  df = RDataFrame('Events', file)
-  count_result = df.Count()
-  return count_result.GetValue()
+def getNevt(file):
+    # Get the "Events" tree
+    events_tree = file.Get("Events")
+    # Get the number of entries (events) in the tree
+    count_result = events_tree.GetEntries()
+    # Close the ROOT file
+    return count_result
+
+def getNevtNotSel(file):
+    # Get the "Events" tree
+    events_tree = file.Get("EventsNotSelected")
+    # Get the number of entries (events) in the tree
+    count_result = events_tree.GetEntries()
+    # Close the ROOT file
+    return count_result
+
+
+def save_norm_histo(inputFile, outputFile):
+    # Create a histogram for storing the number of events and sum of weights
+    hist = TH1D("n_events_and_sum_weights", "Number of Events and Sum of Weights", 9, 0, 9)
+    # Set the bin labels
+    bin_labels = [
+        "Total Events", "Selected Events", "Not Selected Events",
+        "Total Sum Weights","Sum Weights Selected", "Sum Weights Not Selected", 
+        "Total Sum of sign of Weights", "Sum of sign of Weights Selected", "Sum of sign of Weights Not Selected"
+        ]
+    for i, label in enumerate(bin_labels, 1):
+        hist.GetXaxis().SetBinLabel(i, label)
+
+    # Helper function to process trees
+    def process_tree(tree_name, bin_evt, bin_w, bin_w_sign):
+        if tree_name in keys:
+            n_evt = getNevt(inputFile) if tree_name == 'Events' else getNevtNotSel(inputFile)
+            hist.SetBinContent(bin_evt, n_evt)
+            tree = inputFile.Get(tree_name)
+            tree.SetNotify(0)  # This suppresses useless warnings 
+            df = RDataFrame(tree_name, inputFile)
+
+            if not df.HasColumn("genWeight"):
+                print(f"WARNING: Branch 'genWeight' not found in tree '{tree_name}', skipping this tree")
+                return
+
+            df = df.Define('genWeightD', 'std::copysign<double>(1., genWeight)')
+            hist.SetBinContent(bin_w, df.Sum('genWeight').GetValue())
+            hist.SetBinContent(bin_w_sign, df.Sum('genWeightD').GetValue())
+        else:
+            print(f"ATTENTION: {tree_name} is not present, it will be skipped")
+
+    keys = [key.GetName() for key in inputFile.GetListOfKeys()]
+
+    # Process 'Events' tree
+    print("Processing Events")
+    process_tree('Events', 2, 5, 8)
+
+    # Process 'EventsNotSelected' tree
+    print("Processing EventsNotSelected")
+    process_tree('EventsNotSelected', 3, 6, 9)
+
+    # Calculate totals
+    hist.SetBinContent(1, hist.GetBinContent(2) + hist.GetBinContent(3))
+    hist.SetBinContent(4, hist.GetBinContent(5) + hist.GetBinContent(6))
+    hist.SetBinContent(7, hist.GetBinContent(8) + hist.GetBinContent(9))
+
+    # Write the histogram to the output file
+    outputFile.cd()
+    hist.Write()
+
diff --git a/PicoProducer/python/processors/picojob.py b/PicoProducer/python/processors/picojob.py
@@ -29,7 +29,6 @@
 parser.add_argument('-v', '--verbose',  dest='verbosity', type=int, nargs='?', const=1, default=0)
 args = parser.parse_args()
 
-
 # SETTING
 era       = args.era      # e.g. '2017', 'UL2017', ...
 year      = getyear(era)  # integer year, e.g. 2017
@@ -83,7 +82,7 @@
   json = getjson(era,dtype)
 
 # EXTRA OPTIONS
-kwargs = { 'era': era, 'year': year, 'dtype': dtype, 'compress': compress, 'verb': verbosity }
+kwargs = { 'era': era, 'year': year, 'dtype': dtype, 'compress': compress, 'verb': verbosity, 'firstevt': firstevt}
 for option in args.extraopts:
   #for option in options.strip().split(' '):
   assert '=' in option, "Extra option '%s' should contain '='! All: %s"%(option,args.extraopts)