-
Notifications
You must be signed in to change notification settings - Fork 42
/
HistSet.py
195 lines (175 loc) · 8.53 KB
/
HistSet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# -*- coding: utf-8 -*-
# Author: Izaak Neutelings (July 2020)
# Description: Container class for histgrams lists to separate those of observed data from MC.
from TauFW.Plotter.plot.Stack import Stack, THStack, TH1
def integratehist(hist):
if hist.InheritsFrom('TH2'):
return hist.Integral(0,hist.GetXaxis().GetNbins()+1,0,hist.GetYaxis().GetNbins()+1)
else:
return hist.Integral(0,hist.GetXaxis().GetNbins()+1)
class HistSet(object):
"""Container class for histgrams lists to separate those of observed data from MC."""
def __init__(self, data=None, exp=None, sig=None, var=None, sel=None):
#print(">>> HistSet.__init__: var=%r, sel=%r"%(var,sel))
self.data = data # single data TH1D histogram
self.exp = exp or [ ] # list of background TH1D histograms (Drell-Yan, ttbar, W+jets, ...), to be stacked
self.sig = sig or [ ] # list of signal TH1D histograms (for new physics searches), to be overlaid
self.var = var # Variable object
self.sel = sel # Selection object for changing variable context
if isinstance(data,dict): # data = { sample: hist } dictionary
self.data = None
for sample, hist in data.items():
if sample.isdata: # observed data
self.data = hist
elif sample.issignal: # signal
self.sig.append(hist)
else: # exp (background)
self.exp.append(hist)
def __iter__(self):
"""Return iterator over all histograms dictionary."""
if self.data:
yield self.data
for hist in self.exp:
yield hist
for hist in self.sig:
yield hist
def all(self):
"""Return list of all histgrams."""
return list(iter(self))
def display(self):
"""Print tables of histogram yields (for debugging)."""
TAB = LOG.table("%13.2f %13d %13.3f %r")
TAB.printheader("Integral","Entries","Ave. weight","Hist name ")
totent = 0
totint = 0
def row(hist):
hint, hent = integratehist(hist), hist.GetEntries()
return (hint, hent, hint/hent if hent!=0 else -1, hist.GetName())
if self.data:
TAB.printrow(*row(self.data))
for hist in self.exp:
hint, hent, wgt, name = row(hist)
TAB.printrow(hint,hent,wgt,name)
totent += hint
totint += hent
TAB.printrow(totint,totent,totint/totent if totent!=0 else -1,"total exp.")
for hist in self.sig:
TAB.printrow(*row(hist))
return totent, totint
def getstack(self, var=None, context=None, **kwargs):
"""Create and return a Stack object."""
verb = kwargs.get('verb', 0)
if var==None: # for initiation of Plot object
var = self.var
if self.sel!=None and context==None: # for setting context of Variable object
context = self.sel
LOG.verb("HistSet.getstack: context=%r, var=%r, self.var=%r, self.sel=%r"%(context,var,self.var,self.sel),verb,2)
if context!=None and hasattr(var,'changecontext'):
var.changecontext(context,verb=verb)
stack = Stack(var,self.data,self.exp,self.sig,**kwargs)
return stack # return Stack object
def getTHStack(self, name='stack', **kwargs):
"""Create and return a THStack of backgrounds histograms."""
stack = THStack(name,name)
for hist in reversed(self.exp):
stack.Add(hist)
return stack # return THStack object
class HistDict(object):
"""Container class for nested dictionaries of HistSets.
HistDict is basically a set of nested dictionaries plus some helper functions:
hist_dict = {
selection1: {
variable1: HistSet,
variable2: HistSet,
}
}
"""
def __init__(self,res_dict=None,**kwargs):
self._dict = { } # { selection : { variable: HistSet } }
def __getitem__(self,selection):
"""Get nested dict for given selection key (do not create one if not exists)."""
return self._dict.get(selection,None) # should be dict
def __setitem__(self,selection,value):
"""Set nested dict of given selection key."""
self._dict[selection] = value # should be dict
def __len__(self):
"""Return number of selection x variables."""
return sum(len(self._dict[s][v].values() for s in self._dict for v in self._dict[s]))
def __iter__(self):
"""Return iterator over results dictionary."""
return iter(self._dict)
def display(self,nvars=1):
"""Print tables of histogram yields (for debugging)."""
for sel in self._dict:
for i, var in enumerate(self._dict[sel]):
if nvars>=1 and i>=nvars: break
histset = self._dict[sel][var]
#print(f"histset={histset!r}")
vstr = "(%r,%r)"%(var[0].filename,var[1].filename) if isinstance(var,tuple) else\
repr(var.filename) if hasattr(var,'filename') else repr(var)
if isinstance(histset,dict) and any(isinstance(h,TH1) for h in histset.values()): # { sample: hist }
histset = HistSet(histset) # convert dictionary to HistSet for easy display
if isinstance(histset,HistSet):
print(">>> Histogram yields for selection %r, variable %s:"%(sel.selection,vstr))
histset.display()
elif isinstance(histset,TH1): # TH1 histogram
hist = histset
if i==0: # only print first time
print(">>> Histogram yields for selection %r, variable %s:"%(sel.selection,vstr))
TAB = LOG.table("%13.2f %13d %13.3f %r")
TAB.printheader("Integral","Entries","Ave. weight","Hist name ")
hint, hent = integratehist(hist), hist.GetEntries()
TAB.printrow(hint, hent, hint/hent if hent!=0 else -1, hist.GetName())
elif isinstance(histset,dict): # assume { sample: number value }
for sample, value in histset.items():
if i==0: # only print first time
print(">>> Number value:")
TAB = LOG.table("%15.4f %-15s %-15s %r")
TAB.printheader("Number","Sample","Variable","Selection"+' '*20)
TAB.printrow(value,sample.name,vstr,sel.selection)
else: # assume { sample: number value }
value = histset
if i==0: # only print first time
print(">>> Number value:")
TAB = LOG.table("%15.4f %-15s %r")
TAB.printheader("Number","Variable","Selection"+' '*20)
TAB.printrow(value,vstr,sel.selection)
def insert(self,hist_dict,idx=-1,verb=0):
"""Insert histograms per selection/variable."""
for selection in hist_dict:
assert selection in self._dict, "HistDict.insert: Unrecognized selection %r... hist_dict=%r, self._dict=%r"%(selection,hist_dict,self._dict)
for variable in hist_dict[selection]:
assert variable in self._dict[selection], "HistDict.insert: Unrecognized variable %r for selection%r... hist_dict=%r, self._dict=%r"%(
variable,selection,hist_dict,self._dict)
hist = hist_dict[selection][variable] # TH1D histogram
idx_ = idx if idx>=0 else len(self._dict[selection][variable].exp)+1+idx # if index negative: count from end of list
LOG.verb("HistDict.insert: Inserting=%r at index %r (%r)"%(hist,idx_,idx),verb,2)
self._dict[selection][variable].exp.insert(idx_,hist)
def results(self,singlevar=False,singlesel=False,popvar=None):
"""Return simple nested dictionaries. { selection: { variable: HistSet } }
Convert for just a single variable (issinglevar==True), and/or single selection (issinglesel==True)."""
results = self._dict
if popvar!=None: # remove variable from dictionary (used by Sample.getmean)
for sel in results:
results[sel].pop(popvar,None)
if singlevar: # convert result to { selection: HistSet }
for sel in results.keys():
if len(results[sel])>=2:
LOG.warn("HistDict.results: singlevar=%r, singlesel=%r, but found more than one selection key in self._dict=%r"%(
singlevar,singlesel,self._dict))
if len(results[sel])==0: # no variables/hists
results.pop(sel) # delete nested dictionary
else:
varkey = list(results[sel].keys())[0] # get first (and only?) key
results[sel] = results[sel][varkey]
if singlesel: # convert result to { variable: HistSet } or if singlevar==True: a single HistSet
if len(results)>=2:
LOG.warn("HistDict.results: singlevar=%r, singlesel=%r, but found more than one selection key in self._dict=%r"%(
singlevar,singlesel,self._dict))
if len(results)==0: # no selections
results = HistSet() # empty hist set
else:
selkey = list(results.keys())[0] # get first (and only?) key
results = results[selkey] # get single nested dictionary or HistSet
return results
from TauFW.Plotter.sample.utils import LOG