From 6e765c96988ae7285422322a487e2d7ddb7a36b3 Mon Sep 17 00:00:00 2001 From: rkansal47 Date: Mon, 4 Sep 2023 17:55:16 -0500 Subject: [PATCH] significance plots --- .../postprocessing/PostProcessVBF.ipynb | 71 +++++------ src/HHbbVV/postprocessing/plotting.py | 63 +++++++++- src/HHbbVV/postprocessing/postprocessing.py | 117 +++++++----------- src/HHbbVV/postprocessing/utils.py | 54 ++++++-- 4 files changed, 174 insertions(+), 131 deletions(-) diff --git a/src/HHbbVV/postprocessing/PostProcessVBF.ipynb b/src/HHbbVV/postprocessing/PostProcessVBF.ipynb index 294a42ee..80da5519 100644 --- a/src/HHbbVV/postprocessing/PostProcessVBF.ipynb +++ b/src/HHbbVV/postprocessing/PostProcessVBF.ipynb @@ -12,7 +12,7 @@ "import corrections\n", "from collections import OrderedDict\n", "\n", - "from utils import CUT_MAX_VAL\n", + "from utils import CUT_MAX_VAL, ShapeVar\n", "from hh_vars import (\n", " years,\n", " data_key,\n", @@ -150,46 +150,32 @@ "outputs": [], "source": [ "# {var: (bins, label)}\n", - "control_plot_vars = {\n", - " # \"MET_pt\": ([50, 0, 300], r\"$p^{miss}_T$ (GeV)\"),\n", - " \"DijetEta\": ([30, -8, 8], r\"$\\eta^{jj}$\"),\n", - " \"DijetPt\": ([30, 0, 750], r\"$p_T^{jj}$ (GeV)\"),\n", - " \"DijetMass\": (\n", - " # list(range(800, 1400, 100)) + [1400, 1600, 2000, 3000, 4400],\n", - " [30, 600, 4500],\n", - " r\"$m^{jj}$ (GeV)\",\n", - " ),\n", - " \"bbFatJetEta\": ([30, -2.4, 2.4], r\"$\\eta^{bb}$\"),\n", - " \"bbFatJetPt\": ([30, 300, 1500], r\"$p^{bb}_T$ (GeV)\"),\n", - " \"bbFatJetParticleNetMass\": ([20, 50, 250], r\"$m^{bb}_{reg}$ (GeV)\"),\n", - " # \"bbFatJetMsd\": ([50, 0, 300], r\"$m^{bb}_{msd}$ (GeV)\"),\n", - " # \"bbFatJetParticleNetMD_Txbb\": ([50, 0.8, 1], r\"$T^{bb}_{Xbb}$\"),\n", - " \"VVFatJetEta\": ([30, -2.4, 2.4], r\"$\\eta^{VV}$\"),\n", - " \"VVFatJetPt\": ([30, 300, 1500], r\"$p^{VV}_T$ (GeV)\"),\n", - " \"VVFatJetParticleNetMass\": (\n", - " # list(range(50, 110, 10)) + list(range(110, 200, 15)) + [200, 220, 250],\n", - " [20, 50, 250],\n", - " r\"$m^{VV}_{reg}$ (GeV)\",\n", - " ),\n", - " # \"VVFatJetMsd\": ([40, 50, 250], r\"$m^{VV}_{msd}$ (GeV)\"),\n", - " # \"VVFatJetParticleNet_Th4q\": ([50, 0, 1], r\"Prob($H \\to 4q$) vs Prob(QCD) (Non-MD)\"),\n", - " # \"VVFatJetParTMD_THWW4q\": (\n", - " # [50, 0, 1],\n", - " # r\"Prob($H \\to VV \\to 4q$) vs Prob(QCD) (Mass-Decorrelated)\",\n", - " # ),\n", - " # \"VVFatJetParTMD_probT\": ([50, 0, 1], r\"Prob(Top) (Mass-Decorrelated)\"),\n", - " # \"VVFatJetParTMD_THWWvsT\": (\n", - " # [50, 0, 1],\n", - " # r\"$T^{VV}_{HWW}$\",\n", - " # ),\n", - " # \"bbFatJetPtOverDijetPt\": ([50, 0, 40], r\"$p^{bb}_T / p_T^{jj}$\"),\n", - " # \"VVFatJetPtOverDijetPt\": ([50, 0, 40], r\"$p^{VV}_T / p_T^{jj}$\"),\n", - " # \"VVFatJetPtOverbbFatJetPt\": ([50, 0.4, 2.0], r\"$p^{VV}_T / p^{bb}_T$\"),\n", - " # \"nGoodMuons\": ([3, 0, 3], r\"# of Muons\"),\n", - " # \"nGoodElectrons\": ([3, 0, 3], r\"# of Electrons\"),\n", - " # \"nGoodJets\": ([5, 0, 5], r\"# of AK4 B-Jets\"),\n", - " # \"BDTScore\": ([50, 0, 1], r\"BDT Score\"),\n", - "}\n", + "control_plot_vars = [\n", + " # ShapeVar(var=\"MET_pt\", label=r\"$p^{miss}_T$ (GeV)\", bins=[50, 0, 300]),\n", + " # ShapeVar(var=\"DijetEta\", label=r\"$\\eta^{jj}$\", bins=[30, -8, 8]),\n", + " # ShapeVar(var=\"DijetPt\", label=r\"$p_T^{jj}$ (GeV)\", bins=[30, 0, 750]),\n", + " # ShapeVar(var=\"DijetMass\", label=r\"$m^{jj}$ (GeV)\", bins=[30, 600, 4000]),\n", + " # ShapeVar(var=\"bbFatJetEta\", label=r\"$\\eta^{bb}$\", bins=[30, -2.4, 2.4]),\n", + " ShapeVar(var=\"bbFatJetPt\", label=r\"$p^{bb}_T$ (GeV)\", bins=[30, 300, 1500], significance_dir=\"right\"),\n", + " ShapeVar(var=\"bbFatJetParticleNetMass\", label=r\"$m^{bb}_{reg}$ (GeV)\", bins=[20, 50, 250], significance_dir=\"bin\"),\n", + " # ShapeVar(var=\"bbFatJetMsd\", label=r\"$m^{bb}_{msd}$ (GeV)\", bins=[50, 0, 300]),\n", + " # ShapeVar(var=\"bbFatJetParticleNetMD_Txbb\", label=r\"$T^{bb}_{Xbb}$\", bins=[50, 0.8, 1]),\n", + " # ShapeVar(var=\"VVFatJetEta\", label=r\"$\\eta^{VV}$\", bins=[30, -2.4, 2.4]),\n", + " # ShapeVar(var=\"VVFatJetPt\", label=r\"$p^{VV}_T$ (GeV)\", bins=[30, 300, 1500]),\n", + " # ShapeVar(var=\"VVParticleNetMass\", label=r\"$m^{VV}_{reg}$ (GeV)\", bins=[20, 50, 250]),\n", + " # ShapeVar(var=\"VVFatJetMsd\", label=r\"$m^{VV}_{msd}$ (GeV)\", bins=[40, 50, 250]),\n", + " # ShapeVar(var=\"VVFatJetParticleNet_Th4q\", label=r\"Prob($H \\to 4q$) vs Prob(QCD) (Non-MD)\", bins=[50, 0, 1]),\n", + " # ShapeVar(var=\"VVFatJetParTMD_THWW4q\", label=r\"Prob($H \\to VV \\to 4q$) vs Prob(QCD) (Mass-Decorrelated)\", bins=[50, 0, 1]),\n", + " # ShapeVar(var=\"VVFatJetParTMD_probT\", label=r\"Prob(Top) (Mass-Decorrelated)\", bins=[50, 0, 1]),\n", + " # ShapeVar(var=\"VVFatJetParTMD_THWWvsT\", label=r\"$T^{VV}_{HWW}$\", bins=[50, 0, 1]),\n", + " # ShapeVar(var=\"bbFatJetPtOverDijetPt\", label=r\"$p^{bb}_T / p_T^{jj}$\", bins=[50, 0, 40]),\n", + " # ShapeVar(var=\"VVFatJetPtOverDijetPt\", label=r\"$p^{VV}_T / p_T^{jj}$\", bins=[50, 0, 40]),\n", + " # ShapeVar(var=\"VVFatJetPtOverbbFatJetPt\", label=r\"$p^{VV}_T / p^{bb}_T$\", bins=[50, 0.4, 2.0]),\n", + " # ShapeVar(var=\"nGoodMuons\", label=r\"# of Muons\", bins=[3, 0, 3]),\n", + " # ShapeVar(var=\"nGoodElectrons\", label=r\"# of Electrons\", bins=[3, 0, 3]),\n", + " # ShapeVar(var=\"nGoodJets\", label=r\"# of AK4 B-Jets\", bins=[5, 0, 5]),\n", + " # ShapeVar(var=\"BDTScore\", label=r\"BDT Score\", bins=[50, 0, 1]),\n", + "]\n", "\n", "hists = postprocessing.control_plots(\n", " events_dict,\n", @@ -206,8 +192,9 @@ " \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\": 2e3,\n", " \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\": 2e3,\n", " },\n", + " plot_significance=True,\n", " show=True,\n", - " log=\"both\",\n", + " log=False,\n", ")" ] }, diff --git a/src/HHbbVV/postprocessing/plotting.py b/src/HHbbVV/postprocessing/plotting.py index 20792fad..693a3bd9 100644 --- a/src/HHbbVV/postprocessing/plotting.py +++ b/src/HHbbVV/postprocessing/plotting.py @@ -189,6 +189,14 @@ def _fill_error(ax, edges, down, up, scale=1): ) +def _asimov_significance(s, b): + """Asimov estimate of discovery significance (with no systematic uncertainties). + See e.g. https://www.pp.rhul.ac.uk/~cowan/atlas/cowan_atlas_15feb11.pdf. + Or for more explanation: https://www.pp.rhul.ac.uk/~cowan/stat/cowan_munich16.pdf + """ + return np.sqrt(2 * ((s + b) * np.log(1 + (s / b)) - s)) + + def ratioHistPlot( hists: Hist, year: str, @@ -210,6 +218,8 @@ def ratioHistPlot( log: bool = False, ratio_ylims: List[float] = [0, 2], divide_bin_width: bool = False, + plot_significance: bool = False, + significance_dir: str = "right", axrax: Tuple = None, ): """ @@ -243,6 +253,8 @@ def ratioHistPlot( bg_order (List[str]): order in which to plot backgrounds ratio_ylims (List[float]): y limits on the ratio plots divide_bin_width (bool): divide yields by the bin width (for resonant fit regions) + plot_significance (bool): plot Asimov significance below ratio plot + significance_dir (str): "Direction" for significance. i.e. a > cut ("right"), a < cut ("left"), or per-bin ("bin"). axrax (Tuple): optionally input ax and rax instead of creating new ones """ # copy hists and bg_keys so input objects are not changed @@ -258,13 +270,20 @@ def ratioHistPlot( hists, data_err = _divide_bin_widths(hists, data_err) # set up plots - if axrax is None: + if axrax is not None: + if plot_significance: + raise RuntimeError("Significance plots with input axes not implemented yet.") + + ax, rax = axrax + ax.sharex(rax) + elif plot_significance: + fig, (ax, rax, sax) = plt.subplots( + 3, 1, figsize=(12, 18), gridspec_kw=dict(height_ratios=[3, 1, 1], hspace=0), sharex=True + ) + else: fig, (ax, rax) = plt.subplots( 2, 1, figsize=(12, 14), gridspec_kw=dict(height_ratios=[3, 1], hspace=0), sharex=True ) - else: - ax, rax = axrax - ax.sharex(rax) # plot histograms y_label = r"Events / Bin Width (GeV$^{-1}$)" if divide_bin_width else "Events" @@ -358,6 +377,42 @@ def ratioHistPlot( rax.set_ylim(ratio_ylims) rax.grid() + if plot_significance: + bg_tot = sum([pre_divide_hists[sample, :] for sample in bg_keys]).values() + sigs = [pre_divide_hists[sig_key, :].values() for sig_key in sig_scale_dict] + + if significance_dir == "left": + bg_tot = np.cumsum(bg_tot[::-1])[::-1] + sigs = [np.cumsum(sig[::-1])[::-1] for sig in sigs] + sax.set_ylabel(r"Asimov Sign. for $\leq$ Cuts") + elif significance_dir == "right": + bg_tot = np.cumsum(bg_tot) + sigs = [np.cumsum(sig) for sig in sigs] + sax.set_ylabel(r"Asimov Sign. for $\geq$ Cuts") + elif significance_dir == "bin": + sax.set_ylabel("Asimov Sign. per Bin") + else: + raise RuntimeError( + 'Invalid value for ``significance_dir``. Options are ["left", "right", "bin"].' + ) + + edges = pre_divide_hists.axes[1].edges + hep.histplot( + [(_asimov_significance(sig, bg_tot), edges) for sig in sigs], + ax=sax, + histtype="step", + label=[ + sig_key if sig_key not in sample_label_map else sample_label_map[sig_key] + for sig_key in sig_scale_dict + ], + color=sig_colours[: len(sig_keys)], + ) + + sax.legend(fontsize=12) + sax.set_yscale("log") + sax.set_ylim([1e-7, 10]) + sax.set_xlabel(hists.axes[1].label) + if title is not None: ax.set_title(title, y=1.08) diff --git a/src/HHbbVV/postprocessing/postprocessing.py b/src/HHbbVV/postprocessing/postprocessing.py index 3a6d4ea5..0380c685 100644 --- a/src/HHbbVV/postprocessing/postprocessing.py +++ b/src/HHbbVV/postprocessing/postprocessing.py @@ -49,7 +49,7 @@ jec_shifts, jmsr_shifts, ) -from utils import CUT_MAX_VAL +from utils import CUT_MAX_VAL, ShapeVar from pprint import pprint from copy import deepcopy @@ -62,36 +62,6 @@ # warnings.simplefilter(action="ignore", category=SettingWithCopyWarning) -class ShapeVar: - """Class to store attributes of the variable to be fit on. - - Args: - var (str): variable name - label (str): variable label - bins (List[int]): bins - reg (bool, optional): Use a regular axis or variable binning. Defaults to True. - blind_window (List[int], optional): if blinding . Defaults to None. - """ - - def __init__( - self, - var: str, - label: str, - bins: List[int], - reg: bool = True, - blind_window: List[int] = None, - ): - self.var = var - self.label = label - self.blind_window = blind_window - - # create axis used for histogramming - if reg: - self.axis = hist.axis.Regular(*bins, name=var, label=label) - else: - self.axis = hist.axis.Variable(bins, name=var, label=label) - - @dataclass class Syst: samples: List[str] = None @@ -123,36 +93,39 @@ class Region: ] # {var: (bins, label)} -control_plot_vars = { - "MET_pt": ([40, 0, 320], r"$p^{miss}_T$ (GeV)"), - # "DijetEta": ([50, -8, 8], r"$\eta^{jj}$"), - # "DijetPt": ([50, 0, 750], r"$p_T^{jj}$ (GeV)"), - # "DijetMass": ([50, 500, 3000], r"$m^{jj}$ (GeV)"), - "bbFatJetPhi": ([40, -3.5, 3.5], r"$\varphi^{bb}$"), - "bbFatJetEta": ([40, -3, 3], r"$\eta^{bb}$"), - "bbFatJetPt": ([40, 300, 2300], r"$p^{bb}_T$ (GeV)"), # TODO: increase bin widths, x max - # "bbFatJetParticleNetMass": ([50, 0, 300], r"$m^{bb}_{reg}$ (GeV)"), - # "bbFatJetMsd": ([50, 0, 300], r"$m^{bb}_{msd}$ (GeV)"), - # "bbFatJetParticleNetMD_Txbb": ([50, 0.8, 1], r"$p^{bb}_{Txbb}$"), - "VVFatJetPhi": ([40, -3.5, 3.5], r"$\varphi^{VV}$"), - "VVFatJetEta": ([40, -3, 3], r"$\eta^{VV}$"), - "VVFatJetPt": ([40, 300, 2300], r"$p^{VV}_T$ (GeV)"), - # "VVFatJetParticleNetMass": ([50, 0, 300], r"$m^{VV}_{reg}$ (GeV)"), - # "VVFatJetMsd": ([50, 0, 300], r"$m^{VV}_{msd}$ (GeV)"), - # "VVFatJetParticleNet_Th4q": ([50, 0, 1], r"Prob($H \to 4q$) vs Prob(QCD) (Non-MD)"), - # "VVFatJetParTMD_THWW4q": ( - # [50, 0, 1], - # r"Prob($H \to VV \to 4q$) vs Prob(QCD) (Mass-Decorrelated)", - # ), - # "VVFatJetParTMD_probT": ([50, 0, 1], r"Prob(Top) (Mass-Decorrelated)"), - # "bbFatJetPtOverDijetPt": ([50, 0, 40], r"$p^{bb}_T / p_T^{jj}$"), - # "VVFatJetPtOverDijetPt": ([50, 0, 40], r"$p^{VV}_T / p_T^{jj}$"), - # "VVFatJetPtOverbbFatJetPt": ([50, 0.4, 2.0], r"$p^{VV}_T / p^{bb}_T$"), - # "nGoodMuons": ([3, 0, 3], r"# of Muons"), - # "nGoodElectrons": ([3, 0, 3], r"# of Electrons"), - # "nGoodJets": ([5, 0, 5], r"# of AK4 B-Jets"), - # "BDTScore": ([50, 0, 1], r"BDT Score"), -} +control_plot_vars = [ + ShapeVar(var="MET_pt", label=r"$p^{miss}_T$ (GeV)", bins=[50, 0, 300]), + ShapeVar(var="DijetEta", label=r"$\eta^{jj}$", bins=[30, -8, 8]), + ShapeVar(var="DijetPt", label=r"$p_T^{jj}$ (GeV)", bins=[30, 0, 750]), + ShapeVar(var="DijetMass", label=r"$m^{jj}$ (GeV)", bins=[30, 600, 4000]), + ShapeVar(var="bbFatJetEta", label=r"$\eta^{bb}$", bins=[30, -2.4, 2.4]), + ShapeVar( + var="bbFatJetPt", label=r"$p^{bb}_T$ (GeV)", bins=[30, 300, 1500], significance_dir="right" + ), + ShapeVar( + var="bbFatJetParticleNetMass", + label=r"$m^{bb}_{reg}$ (GeV)", + bins=[20, 50, 250], + significance_dir="bin", + ), + ShapeVar(var="bbFatJetMsd", label=r"$m^{bb}_{msd}$ (GeV)", bins=[50, 0, 300]), + ShapeVar(var="bbFatJetParticleNetMD_Txbb", label=r"$T^{bb}_{Xbb}$", bins=[50, 0.8, 1]), + ShapeVar(var="VVFatJetEta", label=r"$\eta^{VV}$", bins=[30, -2.4, 2.4]), + ShapeVar(var="VVFatJetPt", label=r"$p^{VV}_T$ (GeV)", bins=[30, 300, 1500]), + ShapeVar(var="VVParticleNetMass", label=r"$m^{VV}_{reg}$ (GeV)", bins=[20, 50, 250]), + ShapeVar(var="VVFatJetMsd", label=r"$m^{VV}_{msd}$ (GeV)", bins=[40, 50, 250]), + # ShapeVar(var="VVFatJetParticleNet_Th4q", label=r"Prob($H \to 4q$) vs Prob(QCD) (Non-MD)", bins=[50, 0, 1]), + # ShapeVar(var="VVFatJetParTMD_THWW4q", label=r"Prob($H \to VV \to 4q$) vs Prob(QCD) (Mass-Decorrelated)", bins=[50, 0, 1]), + # ShapeVar(var="VVFatJetParTMD_probT", label=r"Prob(Top) (Mass-Decorrelated)", bins=[50, 0, 1]), + ShapeVar(var="VVFatJetParTMD_THWWvsT", label=r"$T^{VV}_{HWW}$", bins=[50, 0, 1]), + ShapeVar(var="bbFatJetPtOverDijetPt", label=r"$p^{bb}_T / p_T^{jj}$", bins=[50, 0, 40]), + ShapeVar(var="VVFatJetPtOverDijetPt", label=r"$p^{VV}_T / p_T^{jj}$", bins=[50, 0, 40]), + ShapeVar(var="VVFatJetPtOverbbFatJetPt", label=r"$p^{VV}_T / p^{bb}_T$", bins=[50, 0.4, 2.0]), + # ShapeVar(var="nGoodMuons", label=r"# of Muons", bins=[3, 0, 3]), + # ShapeVar(var="nGoodElectrons", label=r"# of Electrons", bins=[3, 0, 3]), + # ShapeVar(var="nGoodJets", label=r"# of AK4 B-Jets", bins=[5, 0, 5]), + # ShapeVar(var="BDTScore", label=r"BDT Score", bins=[50, 0, 1]), +] def get_nonres_selection_regions( @@ -1002,7 +975,7 @@ def control_plots( events_dict: Dict[str, pd.DataFrame], bb_masks: Dict[str, pd.DataFrame], sig_keys: List[str], - control_plot_vars: Dict[str, Tuple], + control_plot_vars: List[ShapeVar], plot_dir: str, year: str, weight_key: str = "finalWeight", @@ -1014,6 +987,7 @@ def control_plots( sig_scale_dict: Dict[str, float] = None, combine_pdf: bool = True, HEM2d: bool = False, + plot_significance: bool = False, show: bool = False, log: Tuple[bool, str] = "both", ): @@ -1035,17 +1009,14 @@ def control_plots( if sig_scale_dict is None: sig_scale_dict = {sig_key: 2e5 for sig_key in sig_keys} - # sig_scale_dict["HHbbVV"] = 2e5 - - # print(f"{sig_scale_dict = }") print(control_plot_vars) print(selection) - for var, (bins, label) in control_plot_vars.items(): - if var not in hists: - hists[var] = utils.singleVarHist( - events_dict, var, bins, label, bb_masks, weight_key=weight_key, selection=selection + for shape_var in control_plot_vars: + if shape_var.var not in hists: + hists[shape_var.var] = utils.singleVarHist( + events_dict, shape_var, bb_masks, weight_key=weight_key, selection=selection ) if HEM2d and year == "2018": @@ -1069,15 +1040,17 @@ def control_plots( merger_control_plots = PdfMerger() - for var, var_hist in hists.items(): - name = f"{tplot_dir}/{cutstr}{var}{logstr}.pdf" + for shape_var in control_plot_vars: + name = f"{tplot_dir}/{cutstr}{shape_var.var}{logstr}.pdf" plotting.ratioHistPlot( - var_hist, + hists[shape_var.var], year, plot_sig_keys, bg_keys, name=name, sig_scale_dict=tsig_scale_dict if not log else None, + plot_significance=plot_significance, + significance_dir=shape_var.significance_dir, show=show, log=log, ylim=None if not log else 1e15, diff --git a/src/HHbbVV/postprocessing/utils.py b/src/HHbbVV/postprocessing/utils.py index 4e2c98a6..81c67ff1 100644 --- a/src/HHbbVV/postprocessing/utils.py +++ b/src/HHbbVV/postprocessing/utils.py @@ -4,6 +4,7 @@ Author: Raghav Kansal """ +from dataclasses import dataclass import time import contextlib from os import listdir @@ -33,6 +34,35 @@ CUT_MAX_VAL = 9999.0 +@dataclass +class ShapeVar: + """Class to store attributes of a variable to make a histogram of. + + Args: + var (str): variable name + label (str): variable label + bins (List[int]): bins + reg (bool, optional): Use a regular axis or variable binning. Defaults to True. + blind_window (List[int], optional): if blinding, set min and max values to set 0. Defaults to None. + significance_dir (str, optional): if plotting significance, which direction to plot it in. + See more in plotting.py:ratioHistPlot(). Options are ["left", "right", "bin"]. Defaults to "right". + """ + + var: str = None + label: str = None + bins: List[int] = None + reg: bool = True + blind_window: List[int] = None + significance_dir: str = "right" + + def __post_init__(self): + # create axis used for histogramming + if self.reg: + self.axis = hist.axis.Regular(*self.bins, name=self.var, label=self.label) + else: + self.axis = hist.axis.Variable(self.bins, name=self.var, label=self.label) + + def add_bool_arg(parser, name, help, default=False, no_name=None): """Add a boolean command line argument for argparse""" varname = "_".join(name.split("-")) # change hyphens to underscores @@ -350,12 +380,9 @@ def blindBins(h: Hist, blind_region: List, blind_sample: str = None, axis=0): def singleVarHist( events_dict: Dict[str, pd.DataFrame], - var: str, - bins: list, - label: str, + shape_var: ShapeVar, bb_masks: Dict[str, pd.DataFrame], weight_key: str = "finalWeight", - blind_region: List = None, selection: Dict = None, ) -> Hist: """ @@ -364,9 +391,7 @@ def singleVarHist( Args: events (dict): a dict of events of format {sample1: {var1: np.array, var2: np.array, ...}, sample2: ...} - var (str): variable inside the events dict to make a histogram of - bins (list): bins in Hist format i.e. [num_bins, min_value, max_value] - label (str): label for variable (shows up when plotting) + shape_var (ShapeVar): ShapeVar object specifying the variable, label, binning, and (optionally) a blinding window. weight_key (str, optional): which weight to use from events, if different from 'weight' blind_region (list, optional): region to blind for data, in format [low_cut, high_cut]. Bins in this region will be set to 0 for data. @@ -375,10 +400,13 @@ def singleVarHist( """ samples = list(events_dict.keys()) - if len(bins) == 3: - h = Hist.new.StrCat(samples, name="Sample").Reg(*bins, name=var, label=label).Weight() - else: - h = Hist.new.StrCat(samples, name="Sample").Var(bins, name=var, label=label).Weight() + h = Hist( + hist.axis.StrCategory(samples, name="Sample"), + shape_var.axis, + storage="weight", + ) + + var = shape_var.var for sample in samples: events = events_dict[sample] @@ -398,8 +426,8 @@ def singleVarHist( if len(fill_data[var]): h.fill(Sample=sample, **fill_data, weight=weight) - if blind_region is not None: - blindBins(h, blind_region, data_key) + if shape_var.blind_window is not None: + blindBins(h, shape_var.blind_window, data_key) return h