Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CAT Hackathon: retrieving normalization for skimmed files #78

Open
wants to merge 2 commits into
base: hackathon
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion PicoProducer/python/analysis/ModuleMuTau.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
#from TauFW.PicoProducer.corrections.TrigObjMatcher import loadTriggerDataFromJSON, TrigObjMatcher
from TauPOG.TauIDSFs.TauIDSFTool import TauIDSFTool, TauESTool, campaigns


class ModuleMuTau(ModuleTauPair):

def __init__(self, fname, **kwargs):
Expand Down
24 changes: 13 additions & 11 deletions PicoProducer/python/analysis/ModuleTauPair.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from ROOT import TFile, TTree
import sys, re
from math import sqrt, exp, cos
from ROOT import TLorentzVector, TVector3
from ROOT import TLorentzVector, TVector3, RDataFrame, TH1D
from PhysicsTools.NanoAODTools.postprocessing.framework.eventloop import Module
from PhysicsTools.NanoAODTools.postprocessing.framework.datamodel import Collection, Event
from TauFW.PicoProducer.corrections.PileupTool import *
Expand All @@ -13,6 +13,8 @@
from TauFW.PicoProducer.corrections.BTagTool import BTagWeightTool, BTagWPs
from TauFW.common.tools.log import header
from TauFW.PicoProducer.analysis.utils import ensurebranches, redirectbranch, deltaPhi, getmet, getmetfilters, correctmet, getlepvetoes, filtermutau
from TauFW.PicoProducer.analysis.utils import getTotalWeight, getNevt, save_norm_histo

__metaclass__ = type # to use super() with subclasses from CommonProducer
tauSFVersion = {
2016: '2016Legacy', 2017: '2017ReReco', 2018: '2018ReReco',
Expand Down Expand Up @@ -41,7 +43,7 @@ def __init__(self, fname, **kwargs):
self.ees = kwargs.get('ees', 1.0 ) # electron energy scale
self.tes = kwargs.get('tes', None ) # tau energy scale; if None, recommended values are applied
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jmalvaso @pmastrap In general it's better to not change these default settings if it's for your own personal reasons, because it might break other people's setups. If you want to disable this, you need to do so via your sample list or the way you define the channel. (Anyway, I'll try to make this easier / more clear in the new configuration paradigm.)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@IzaakWN Should I restore these setting to the default value and create a new pull request or, for this time, it is fine like this?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest to just add a commit that undoes changing these defaults if that's okay with you?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it's fine. I will do it asap

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@IzaakWN I think I added the commit in the proper way, but please let me know if it's ok now

self.tessys = kwargs.get('tessys', None ) # vary TES: 'Up' or 'Down'
self.dotausfs = kwargs.get('dotausfs', True ) # compute tau SFs with TauIDSF tool
self.dotausfs = kwargs.get('dotausfs', True ) # compute tau SFs with TauIDSF tool
self.fes = kwargs.get('fes', None ) # electron-tau-fake energy scale: None, 'Up' or 'Down' (override with 'ltf=1')
self.ltf = kwargs.get('ltf', None ) # lepton-tau-fake energy scale
self.jtf = kwargs.get('jtf', 1.0 ) or 1.0 # jet-tau-fake energy scale
Expand All @@ -63,6 +65,7 @@ def __init__(self, fname, **kwargs):
self.verbosity = kwargs.get('verb', 0 ) # verbosity
self.jetCutPt = kwargs.get('jetpt', 30 )
self.doSVfit = kwargs.get('SVfit', False ) # add ditau mass estimated with SVfit (any(self.channel==c for c in ['etau','mutau','tautau']))
self.firstevt = kwargs.get('firstevt')
self.bjetCutEta = 2.4 if self.year==2016 else 2.5
self.isUL = 'UL' in self.era

Expand Down Expand Up @@ -145,16 +148,17 @@ def endJob(self):
if self.ismc:
self.btagTool.setDir(self.out.outfile,'btag')
self.out.endJob()



def beginFile(self, inputFile, outputFile, inputTree, wrappedOutputTree):
"""Before processing a new file."""
sys.stdout.flush()

if self.firstevt == 0:
print("Creating Normalization Histogram")
save_norm_histo(inputFile, self.out.outfile)
# for v10
branchesV10 = [
('Muon_isTracker', [True]*32 ),
#('Electron_mvaFall17V217Iso', [1.]*32 ), #not available anymore
('Muon_isTracker', [True]*32 ),
#('Electron_mvaFall17V217Iso', [1.]*32 ), #not available anymore
('Electron_lostHits', [0]*32 ),
('Electron_mvaFall17V2Iso_WPL', 'Electron_mvaIso_WPL' ),
('Electron_mvaFall17V2Iso_WP80', 'Electron_mvaIso_WP80' ),
Expand Down Expand Up @@ -200,15 +204,13 @@ def beginFile(self, inputFile, outputFile, inputTree, wrappedOutputTree):
('HLT_IsoMu24', False ),
('HLT_IsoTkMu24', False ),
]

#check
fullbranchlist = inputTree.GetListOfBranches()
if 'Electron_mvaFall17Iso_WPL' not in fullbranchlist: #v10
ensurebranches(inputTree,branchesV10)
else: #v9
ensurebranches(inputTree,branches) # make sure Event object has these branches



def jetveto(self, event):
"""Return number of vetoed jets. Jet veto maps are mandatory for Run 3 analyses.
The safest procedure would be to veto events if ANY jet with a loose selection lies in the veto regions.
Expand Down Expand Up @@ -280,7 +282,7 @@ def fillEventBranches(self,event):
self.out.lumi[0] = event.luminosityBlock
self.out.npv[0] = event.PV_npvs
self.out.npv_good[0] = event.PV_npvsGood
self.out.metfilter[0] = self.filter(event)
self.out.metfilter[0] = self.filter(event)

if self.ismc:
###self.out.ngentauhads[0] = ngentauhads
Expand Down
76 changes: 71 additions & 5 deletions PicoProducer/python/analysis/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from TauFW.common.tools.file import ensuremodule as _ensuremodule
from TauFW.common.tools.log import Logger
from PhysicsTools.NanoAODTools.postprocessing.framework.datamodel import Collection, Event, Object
import numpy as np
LOG = Logger('Analysis')


Expand Down Expand Up @@ -328,7 +329,8 @@ def getmetfilters(era,isdata,verb=0):
if ('2017' in era or '2018' in era) and ('UL' not in era):
filters.extend(['Flag_ecalBadCalibFilterV2']) # under review for change in Ultra Legacy
# https://twiki.cern.ch/twiki/bin/viewauth/CMS/MissingETOptionalFiltersRun2#Run_2_recommendations
if '2022' in era or '2023' in era:
if '2022' in era or '2023' in era:
#from IPython import embed; embed()
filters.extend(['Flag_BadPFMuonDzFilter'])
filters.extend(['Flag_hfNoisyHitsFilter'])
filters.extend(['Flag_eeBadScFilter'])
Expand Down Expand Up @@ -386,6 +388,7 @@ def getlepvetoes(event, electrons, muons, taus, channel, era):
looseElectrons = [ ]
for electron in Collection(event,'Electron'):
if '2022' in era:
#from IPython import embed; embed()
electronIso90 = electron.mvaIso_Fall17V2_WP90
electronIso = electron.mvaIso_Fall17V2_WPL
elif '2023' in era:
Expand Down Expand Up @@ -474,7 +477,70 @@ def getTotalWeight(file, selections=[""]):
total_w[iSel] += w.GetValue()
return total_w

def getNevt(file): #This function extracts the number of events form Data files
df = RDataFrame('Events', file)
count_result = df.Count()
return count_result.GetValue()
def getNevt(file):
# Get the "Events" tree
events_tree = file.Get("Events")
# Get the number of entries (events) in the tree
count_result = events_tree.GetEntries()
# Close the ROOT file
return count_result

def getNevtNotSel(file):
# Get the "Events" tree
events_tree = file.Get("EventsNotSelected")
# Get the number of entries (events) in the tree
count_result = events_tree.GetEntries()
# Close the ROOT file
return count_result


def save_norm_histo(inputFile, outputFile):
# Create a histogram for storing the number of events and sum of weights
hist = TH1D("n_events_and_sum_weights", "Number of Events and Sum of Weights", 9, 0, 9)
# Set the bin labels
bin_labels = [
"Total Events", "Selected Events", "Not Selected Events",
"Total Sum Weights","Sum Weights Selected", "Sum Weights Not Selected",
"Total Sum of sign of Weights", "Sum of sign of Weights Selected", "Sum of sign of Weights Not Selected"
]
for i, label in enumerate(bin_labels, 1):
hist.GetXaxis().SetBinLabel(i, label)

# Helper function to process trees
def process_tree(tree_name, bin_evt, bin_w, bin_w_sign):
if tree_name in keys:
n_evt = getNevt(inputFile) if tree_name == 'Events' else getNevtNotSel(inputFile)
hist.SetBinContent(bin_evt, n_evt)
tree = inputFile.Get(tree_name)
tree.SetNotify(0) # This suppresses useless warnings
df = RDataFrame(tree_name, inputFile)

if not df.HasColumn("genWeight"):
print(f"WARNING: Branch 'genWeight' not found in tree '{tree_name}', skipping this tree")
return

df = df.Define('genWeightD', 'std::copysign<double>(1., genWeight)')
hist.SetBinContent(bin_w, df.Sum('genWeight').GetValue())
hist.SetBinContent(bin_w_sign, df.Sum('genWeightD').GetValue())
else:
print(f"ATTENTION: {tree_name} is not present, it will be skipped")

keys = [key.GetName() for key in inputFile.GetListOfKeys()]

# Process 'Events' tree
print("Processing Events")
process_tree('Events', 2, 5, 8)

# Process 'EventsNotSelected' tree
print("Processing EventsNotSelected")
process_tree('EventsNotSelected', 3, 6, 9)

# Calculate totals
hist.SetBinContent(1, hist.GetBinContent(2) + hist.GetBinContent(3))
hist.SetBinContent(4, hist.GetBinContent(5) + hist.GetBinContent(6))
hist.SetBinContent(7, hist.GetBinContent(8) + hist.GetBinContent(9))

# Write the histogram to the output file
outputFile.cd()
hist.Write()

3 changes: 1 addition & 2 deletions PicoProducer/python/processors/picojob.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
parser.add_argument('-v', '--verbose', dest='verbosity', type=int, nargs='?', const=1, default=0)
args = parser.parse_args()


# SETTING
era = args.era # e.g. '2017', 'UL2017', ...
year = getyear(era) # integer year, e.g. 2017
Expand Down Expand Up @@ -83,7 +82,7 @@
json = getjson(era,dtype)

# EXTRA OPTIONS
kwargs = { 'era': era, 'year': year, 'dtype': dtype, 'compress': compress, 'verb': verbosity }
kwargs = { 'era': era, 'year': year, 'dtype': dtype, 'compress': compress, 'verb': verbosity, 'firstevt': firstevt}
for option in args.extraopts:
#for option in options.strip().split(' '):
assert '=' in option, "Extra option '%s' should contain '='! All: %s"%(option,args.extraopts)
Expand Down