-
Notifications
You must be signed in to change notification settings - Fork 42
/
JetToTau_MisID.py
208 lines (189 loc) · 10.8 KB
/
JetToTau_MisID.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# -*- coding: utf-8 -*-
# Author: Konstantinos Christoforou (February 2022)
# Description: Data-driven method to estimate mis-id taus (apply jet-to-tau FakeRates)
import os, re
import ROOT
from TauFW.Plotter.plot.string import invertcharge
from TauFW.Plotter.sample.SampleSet import LOG, SampleSet, Variable, deletehist, getcolor, makehistname
from TauFW.Plotter.sample.utils import LOG, STYLE, ensuredir, repkey, joincuts, joinweights, ensurelist, setera, getyear, loadmacro, Sel
import TauFW.Plotter.corrections.JetToTauFR.tools.fakeFactors as fakeFactors
def JetToTau_MisID(self, variables, selection, **kwargs):
"""Substract genuine-tau MC from data with loose-not-tightVsjet tau
and return a histogram of the difference, scaled by the appropriate FakeFactor """
verbosity = LOG.getverbosity(kwargs)
if verbosity>=2:
LOG.header("Estimating jet-to-tau misidentification for variables %s"%(', '.join(v.filename for v in variables)))
#LOG.verbose("\n>>> estimating QCD for variable %s"%(self.var),verbosity,level=2)
cuts_Tight = selection.selection
LooseNotTight_to_Tight = True
if LooseNotTight_to_Tight:
# cuts_LnotTFake = cuts_Tight.replace("id_tau >= 16","id_tau >= 1 && id_tau <= 16").replace("TauIsGenuine","!TauIsGenuine") ## for Loose-not-Tight histos,
# this should be like this, not as above, correct???
cuts_LnotTFake = cuts_Tight.replace("id_tau >= 8","id_tau >= 1 && id_tau < 8").replace("TauIsGenuine","!TauIsGenuine") ## for Loose-not-Tight histos,
else:
cuts_LnotTFake = cuts_Tight.replace("id_tau >= 16","id_tau >= 1").replace("TauIsGenuine","!TauIsGenuine") ## for Loose histos
# this should be like this, not as above, correct???
#cuts_LnotTFake = cuts_Tight.replace("id_tau >= 16","id_tau >= 1 && id_tau <= 16").replace("TauIsGenuine","!TauIsGenuine") ## for Loose histos
#isjetcat = re.search(r"(nc?btag|n[cf]?jets)",cuts_OS)
#relax = 'emu' in self.channel or isjetcat
samples = self.samples
name = kwargs.get('name', 'MisIDtau' )
title = kwargs.get('title', "Mis-ID #tau_h (Data)")
tag = kwargs.get('tag', "" )
#ratio_WJ_QCD = kwargs.get('ratio_WJ_QCD_SS', False )
#doRatio_WJ_QCD = isinstance(ratio_WJ_QCD, c_double )
weight = kwargs.get('weight', "" )
dataweight = kwargs.get('dataweight', "" )
replaceweight = kwargs.get('replaceweight', "" )
scale = kwargs.get('scale', None ) # OS/SS ratio
shift = kwargs.get('shift', 0.0 ) #+ self.shiftQCD # for systematics
#vetoRelax = kwargs.get('vetoRelax', relax )
#relax = kwargs.get('relax', relax ) #and not vetoRelax
#file = kwargs.get('saveto', None )
parallel = kwargs.get('parallel', False )
negthres = kwargs.get('negthres', 0.25 ) # threshold for warning about negative Mis-ID bins
## Calculate the Mis-ID #tau_h (Data) contribution
selections_LnotTFake_forFRbins = []
FRkeylist = []
maxPt = 120
etas = ["Barrel","Endcap"]#["Barrel","Endcap"] #[""]
prongs = [""]#["1prong","3prong"]
ptList = [20, 25 , 30 , 35, 40 , 50 , 70]
UseAbsoluteFR = True
#dirList = ["plots/UL2016_preVFP/mumutau/TauFakeRate_UL2016_preVFP_mumutau"]
#dirList = ["plots/UL2016_preVFP/eetau/TauFakeRate_UL2016_preVFP_eetau"]
#dirList = ["plots/UL2016_postVFP/mumutau/TauFakeRate_UL2016_postVFP_mumutau"]
#dirList = ["plots/UL2016_postVFP/eetau/TauFakeRate_UL2016_postVFP_eetau"]
#dirList = ["plots/UL2017/mumutau/TauFakeRate_UL2017_mumutau"]
#dirList = ["plots/UL2017/eetau/TauFakeRate_UL2017_eetau"]
#dirList = ["plots/UL2018/mumutau/TauFakeRate_UL2018_mumutau"]
#dirList = ["plots/UL2018/eetau/TauFakeRate_UL2018_eetau"]
#dirList = ["plots/UL2018/mumettau/TauFakeRate_UL2018_Medium_mumettau"]
dirList = ["plots/UL2018/mumettau/TauFakeRate_UL2018_Loose_mumettau"]
FRDict = {}
## Selections for prong-eta-pT bins, read the values for FakeRates for each prong-eta bin
## then read the value for each prong-eta-pT bin and store it in a dictionary with key the same
## string as the title of the specific prong-eta-pT selection
## prong bins
for prong in prongs:
name_ = "%s"%(prong)
tit_ = "%s"%(prong)
if prong == "1prong":
cut_ = "%s && TauDM<5"%(cuts_LnotTFake)
elif prong == "3prong":
cut_ = "%s && TauDM>5"%(cuts_LnotTFake)
else:
cut_ = cuts_LnotTFake
## eta bins
for eta in etas:
name__ = "%s_eta%s"%(name_,eta)
tit__ = "%s, %s"%(tit_,eta)
if UseAbsoluteFR:
if eta =="Barrel":
cut__ = "%s && abs(JetEta)<1.5"%(cut_)
elif eta =="Endcap":
cut__ = "%s && abs(JetEta)>1.5 && abs(JetEta)<2.4"%(cut_)
else:
cut__ = cut_
else:
if eta =="Barrel":
cut__ = "%s && abs(TauEta)<1.5"%(cut_)
elif eta =="Endcap":
cut__ = "%s && abs(TauEta)>1.5 && abs(TauEta)<2.4"%(cut_)
else:
cut__ = cut_
#FakeRates = fakeFactors.FakeFactors(dirList, "Data", prong, eta, None, "eetau") ## read the values for FakeRates for each prong-eta bin
#FakeRates = fakeFactors.FakeFactors(dirList, "Data", prong, eta, None, "mumutau") ## read the values for FakeRates for each prong-eta bin
FakeRates = fakeFactors.FakeFactors(dirList, "Data", prong, eta, None, "mumettau") ## read the values for FakeRates for each prong-eta bin
FRDictinPt = {}
FRDictinPt = FakeRates.valuesDict
FRinPtBins = list(FRDictinPt.values())[0]
for i, ptlow in enumerate(ptList):
if i<len(ptList)-1: # ptlow < pt < ptup
ptup = ptList[i+1]
name___ = "%s_pt%d-%d"%(name__,ptlow,ptup)
tit___ = "%s, %d < pt < %d GeV"%(tit__,ptlow,ptup)
if UseAbsoluteFR:
cut___ = "%s && %s<JetPt && JetPt<%s"%(cut__,ptlow,ptup)
else:
cut___ = "%s && %s<TauPt && TauPt<%s"%(cut__,ptlow,ptup)
else: # pt > ptlow (no upper pt cut)
name___ = "%s_pt%d_%s"%(name__,ptlow,maxPt)
tit___ = "%s, %d < pt < %d GeV"%(tit__,ptlow,maxPt)
if UseAbsoluteFR:
cut___ = "%s && %s<JetPt && JetPt<%s"%(cut__,ptlow,maxPt)
else:
cut___ = "%s && %s<TauPt && TauPt<%s"%(cut__,ptlow,maxPt)
selections_LnotTFake_forFRbins.append(Sel(name___,tit___,cut___)) # pt-DM-eta bins
FRDict[tit___] = FRinPtBins[i] #store the FR value in the dictionary using the title of the specific prong-eta-pT selection as key
misIDhists = [ ]
b_misIDhists_init = False
for selection in selections_LnotTFake_forFRbins:
FRDict_keyName = selection.title.replace("p_{T}","pt") ## there must be somewhere an automated replace of pt with p_{T} in the title of the selection
FakeRate = FRDict[FRDict_keyName] ## the FakeRate value for this specific pt-eta-prong bin, which will be used to weight the "fake" Data (=incl. Data - genuine MC)
## Initialize the Inclusive in pT_prong_eta bins data histos
if not b_misIDhists_init:
args = variables, cuts_LnotTFake
misIDhistos = self.gethists(*args,weight=weight,dataweight=dataweight,replaceweight=replaceweight,tag=tag,
task=" ", signal=False,split=False,blind=False,parallel=parallel,verbosity=verbosity-1)
for variable, datahist, exphists in misIDhistos:
exphist = exphists[0].Clone('MC_LnotTFake')
misIDhist = exphists[0].Clone(makehistname(variable.filename,name,tag)) # $VAR_$PROCESS$TAG
misIDhist.Reset()
misIDhist.SetTitle(title)
misIDhist.SetFillColor(ROOT.kOrange-2)
misIDhist.SetOption('HIST')
misIDhists.append(misIDhist)
b_misIDhists_init = True
##############################################################
# Now, for each variable, you should calculate the contribution of the fakes and add it to the above histos # Maybe use datahist instead of exphist to initialize above?
args_LnotTFake = variables, selection.selection #cuts_Tight
hists_LnotTFake = self.gethists(*args_LnotTFake,weight=weight,dataweight=dataweight,replaceweight=replaceweight,tag=tag,
task="Estimating jet-to-tau misIdentification", signal=False,split=False,blind=False,parallel=parallel,verbosity=verbosity-1)
args_LnotTGenuine = variables, selection.selection.replace("!TauIsGenuine","TauIsGenuine") #cuts_Tight
hists_LnotTGenuine = self.gethists(*args_LnotTGenuine,weight=weight,dataweight=dataweight,replaceweight=replaceweight,tag=tag,
task="Estimating jet-to-tau misIdentification", signal=False,split=False,blind=False,parallel=parallel,verbosity=verbosity-1)
j_counter = 0
for variable, datahist, exphists in hists_LnotTGenuine:
if not datahist:
LOG.warning("SampleSet.MisIDtau : No data to make DATA driven jet-to-tau misID!")
return None
# misIDhist
exphist = exphists[0].Clone('MC_LnotTFake')
for hist in exphists[1:]:
exphist.Add(hist)
misIDhist = exphists[0].Clone(makehistname(variable.filename,name,tag)) # $VAR_$PROCESS$TAG
misIDhist.Reset()
for variable_fake, datahist_fake, exphists_fake in hists_LnotTFake:
if variable_fake == variable:
misIDhist.Add(datahist_fake)
# misIDhist.Add(datahist)
misIDhist.Add(exphist,-1)
misIDhist.SetTitle(title)
misIDhist.SetFillColor(ROOT.kOrange-2)#getcolor('QCD'))
misIDhist.SetOption('HIST')
# misIDhists.append(misIDhist)
# ENSURE positive bins
nneg = 0
nbins = misIDhist.GetXaxis().GetNbins()+2 # include under-/overflow
for i in range(0,nbins):
binContent = misIDhist.GetBinContent(i)
if binContent<0:
misIDhist.SetBinContent(i,0)
misIDhist.SetBinError(i,1)
nneg += 1
if nbins and nneg/nbins>negthres:
LOG.warning("SampleSet.MisIDtau: %r has %d/%d>%.1f%% negative bins! Set to 0 +- 1."%(variable.name,nneg,nbins,100.0*negthres),pre=" ")
## So, at this point misIDhist has the LnotT Data minus the Genuine LnotT MC
## Now, you should just apply the tau FakeRates on the LnotT "fake" Data
if LooseNotTight_to_Tight:
FRweight = FakeRate/(1.0-FakeRate) ## you want the LooseNotTight -> Tight weight. FakeRate is the Loose->Tight one
else:
FRweight = FakeRate ## for Loose -> Tight
misIDhist.Scale(FRweight) # scale LnotT -> Tight
misIDhists[j_counter].Add(misIDhist)
j_counter += 1
# CLEAN
deletehist([datahist,exphist]+exphists)
return misIDhists
SampleSet.JetToTau_MisID = JetToTau_MisID # add as class method of SampleSet