From bbb8109f54145d4f6bad080f0b07d2fe6781c654 Mon Sep 17 00:00:00 2001
From: rkansal47 <raghavk1997@yahoo.co.in>
Date: Tue, 13 Feb 2024 14:47:44 -0600
Subject: [PATCH 1/3] lepton id weights

---
 src/HHbbVV/processors/bbVVSkimmer.py | 11 ++++++++
 src/HHbbVV/processors/corrections.py | 39 ++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/src/HHbbVV/processors/bbVVSkimmer.py b/src/HHbbVV/processors/bbVVSkimmer.py
index 6df3f297..0524f279 100644
--- a/src/HHbbVV/processors/bbVVSkimmer.py
+++ b/src/HHbbVV/processors/bbVVSkimmer.py
@@ -35,6 +35,7 @@
     get_jec_jets,
     get_jmsr,
     get_lund_SFs,
+    add_lepton_id_weights,
 )
 from .common import LUMI, HLTs, btagWPs, jec_shifts, jmsr_shifts
 from . import common
@@ -668,6 +669,16 @@ def process(self, events: ak.Array):
                     # to check in postprocessing for xsec & lumi normalisation
                     skimmed_events["weight_noxsec"] = weight
 
+            # separate lepton ID weights for now TODO: incorporate above if lepton vetoes are useful
+            lepton_weights = Weights(len(events), storeIndividual=True)
+            add_lepton_id_weights(lepton_weights, year, goodelectronHbb, "electron", "Loose")
+            add_lepton_id_weights(lepton_weights, year, goodelectronHH, "electron", "wp90noiso")
+            add_lepton_id_weights(lepton_weights, year, goodmuon, "muon", "Loose")
+
+            for systematic in lepton_weights.variations:
+                var_name = f"single_weight_{systematic}"
+                skimmed_events[var_name] = lepton_weights.partial_weight([var_name])
+
         # reshape and apply selections
         sel_all = selection.all(*selection.names)
 
diff --git a/src/HHbbVV/processors/corrections.py b/src/HHbbVV/processors/corrections.py
index ed2cf91e..667b553a 100644
--- a/src/HHbbVV/processors/corrections.py
+++ b/src/HHbbVV/processors/corrections.py
@@ -432,6 +432,7 @@ def _get_lepton_clipped(lep_pt, lep_eta, lepton_type, corr=None):
     return lepton_pt, lepton_eta
 
 
+# Used only for validation region right now
 def add_lepton_weights(weights: Weights, year: str, lepton: MuonArray, lepton_type: str = "muon"):
     ul_year = get_UL_year(year)
 
@@ -455,6 +456,44 @@ def add_lepton_weights(weights: Weights, year: str, lepton: MuonArray, lepton_ty
         weights.add(f"{lepton_type}_{corr}", values["nominal"], values["up"], values["down"])
 
 
+# For analysis region
+def add_lepton_id_weights(
+    weights: Weights, year: str, lepton: NanoEventsArray, lepton_type: str, wp: str
+):
+    ul_year = get_UL_year(year)
+
+    cset = correctionlib.CorrectionSet.from_file(get_pog_json(lepton_type, year))
+
+    lep_pt = np.array(ak.fill_none(lepton.pt, 0.0))
+    lep_eta = np.abs(np.array(ak.fill_none(lepton.eta, 0.0)))
+
+    # some voodoo from cristina
+    lepton_pt, lepton_eta = _get_lepton_clipped(lep_pt, lep_eta, lepton_type)
+    values = {}
+
+    if lepton_type == "electron":
+        # https://cms-nanoaod-integration.web.cern.ch/commonJSONSFs/summaries/EGM_2018_UL_electron.html
+        cset_map = cset["UL-Electron-ID-SF"]
+
+        values["nominal"] = cset_map.evaluate(ul_year, "sf", wp, lepton_eta, lepton_pt)
+        values["up"] = cset_map.evaluate(ul_year, "systup", wp, lepton_eta, lepton_pt)
+        values["down"] = cset_map.evaluate(ul_year, "systdown", wp, lepton_eta, lepton_pt)
+    else:
+        cset_map = cset[f"NUM_{wp}ID_DEN_TrackerMuons"]
+
+        values["nominal"] = cset_map.evaluate(ul_year, lepton_eta, lepton_pt, "sf")
+        values["up"] = cset_map.evaluate(ul_year, lepton_eta, lepton_pt, "systup")
+        values["down"] = cset_map.evaluate(ul_year, lepton_eta, lepton_pt, "systdown")
+
+    for key, value in values.items():
+        # efficiency for a single lepton passing is 1 - (1 - eff1) * (1 - eff2) * ...
+        val = 1 - np.prod(1 - value, axis=1)
+        values[key] = np.nan_to_num(val, nan=1)
+
+    # add weights (for now only the nominal weight)
+    weights.add(f"{lepton_type}_id_{wp}", values["nominal"], values["up"], values["down"])
+
+
 TOP_PDGID = 6
 GEN_FLAGS = ["fromHardProcess", "isLastCopy"]
 

From eb78c58d308efc68de0612eaf92a18b14c373d6d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 13 Feb 2024 20:49:48 +0000
Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/HHbbVV/postprocessing/InferenceAnalysis.ipynb | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/HHbbVV/postprocessing/InferenceAnalysis.ipynb b/src/HHbbVV/postprocessing/InferenceAnalysis.ipynb
index 319a1d6d..16d7069a 100644
--- a/src/HHbbVV/postprocessing/InferenceAnalysis.ipynb
+++ b/src/HHbbVV/postprocessing/InferenceAnalysis.ipynb
@@ -108,9 +108,9 @@
     "]\n",
     "\n",
     "for mX, mY in res_mps:\n",
-    "    res_samples[\n",
-    "        f\"X[{mX}]->H(bb)Y[{mY}](WW)\"\n",
-    "    ] = f\"NMSSM_XToYH_MX{mX}_MY{mY}_HTo2bYTo2W_hadronicDecay\"\n",
+    "    res_samples[f\"X[{mX}]->H(bb)Y[{mY}](WW)\"] = (\n",
+    "        f\"NMSSM_XToYH_MX{mX}_MY{mY}_HTo2bYTo2W_hadronicDecay\"\n",
+    "    )\n",
     "\n",
     "res_sig_keys = list(res_samples.keys())\n",
     "nonres_sig_keys = [\"HHbbVV\"]"
@@ -310,6 +310,7 @@
     "    sample2...\n",
     "}\n",
     "\"\"\"\n",
+    "\n",
     "pt_key = \"Pt\"\n",
     "msd_key = \"Msd\"\n",
     "var_prefix = \"ak8FatJet\"\n",

From 667c598ef13f37d1e8d275bd61636a81bd531470 Mon Sep 17 00:00:00 2001
From: rkansal47 <rkansal@cern.ch>
Date: Fri, 23 Feb 2024 11:41:45 -0600
Subject: [PATCH 3/3] bug fixes

---
 src/HHbbVV/bash/run_local.sh         | 22 ++++-----
 src/HHbbVV/processors/bbVVSkimmer.py | 71 +++++++++++++++++++---------
 src/HHbbVV/processors/corrections.py | 33 +++++++++----
 3 files changed, 83 insertions(+), 43 deletions(-)

diff --git a/src/HHbbVV/bash/run_local.sh b/src/HHbbVV/bash/run_local.sh
index c3e3ac42..d10bb200 100755
--- a/src/HHbbVV/bash/run_local.sh
+++ b/src/HHbbVV/bash/run_local.sh
@@ -4,7 +4,11 @@ mkdir -p tmp/test_outputs/
 
 year=2017
 extraargs=""
-# extraargs="--no-inference"
+extraargs="--no-inference"
+
+python -W ignore src/run.py --processor skimmer --year $year --samples TTbar --subsamples TTToSemiLeptonic --save-systematics --starti 0 --endi 1 $extraargs
+mv "0-1.parquet" tmp/test_outputs/ttsl.parquet
+mv "outfiles/0-1.pkl" tmp/test_outputs/ttsl.pkl
 
 python -W ignore src/run.py --processor skimmer --year $year --samples HH --subsamples GluGluToHHTobbVV_node_cHHH1 --save-systematics --starti 0 --endi 1 $extraargs
 mv "0-1.parquet" tmp/test_outputs/hhbbvv.parquet
@@ -14,14 +18,10 @@ python -W ignore src/run.py --processor skimmer --year $year --samples XHY --sub
 mv "0-1.parquet" tmp/test_outputs/xhy.parquet
 mv "outfiles/0-1.pkl" tmp/test_outputs/xhy.pkl
 
-# python -W ignore src/run.py --processor skimmer --year $year --samples TTbar --subsamples TTToSemiLeptonic --save-systematics --starti 0 --endi 1 $extraargs
-# mv "0-1.parquet" tmp/test_outputs/ttsl.parquet
-# mv "outfiles/0-1.pkl" tmp/test_outputs/ttsl.pkl
-
-# python -W ignore src/run.py --processor skimmer --year $year --samples QCD --subsamples QCD_HT1000to1500 --save-systematics --starti 0 --endi 1 $extraargs
-# mv "0-1.parquet" tmp/test_outputs/qcd.parquet
-# mv "outfiles/0-1.pkl" tmp/test_outputs/qcd.pkl
+python -W ignore src/run.py --processor skimmer --year $year --samples QCD --subsamples QCD_HT1000to1500 --save-systematics --starti 0 --endi 1 $extraargs
+mv "0-1.parquet" tmp/test_outputs/qcd.parquet
+mv "outfiles/0-1.pkl" tmp/test_outputs/qcd.pkl
 
-# python -W ignore src/run.py --processor skimmer --year $year --samples "JetHT$year" --subsamples "JetHT_Run${year}D" --save-systematics --starti 0 --endi 1 $extraargs
-# mv "0-1.parquet" tmp/test_outputs/data.parquet
-# mv "outfiles/0-1.pkl" tmp/test_outputs/data.pkl
\ No newline at end of file
+python -W ignore src/run.py --processor skimmer --year $year --samples "JetHT$year" --subsamples "JetHT_Run${year}D" --save-systematics --starti 0 --endi 1 $extraargs
+mv "0-1.parquet" tmp/test_outputs/data.parquet
+mv "outfiles/0-1.pkl" tmp/test_outputs/data.pkl
\ No newline at end of file
diff --git a/src/HHbbVV/processors/bbVVSkimmer.py b/src/HHbbVV/processors/bbVVSkimmer.py
index f7df3297..d51aebc3 100644
--- a/src/HHbbVV/processors/bbVVSkimmer.py
+++ b/src/HHbbVV/processors/bbVVSkimmer.py
@@ -545,20 +545,19 @@ def process(self, events: ak.Array):
         # https://indico.cern.ch/event/1154430/#b-471403-higgs-meeting-special
 
         goodelectronHbb = (
-            (events.Electron.pt > 20)
-            & (abs(events.Electron.eta) < 2.5)
-            & (events.Electron.miniPFRelIso_all < 0.4)
-            & (events.Electron.cutBased >= events.Electron.LOOSE)
+            (electrons.pt > 20)
+            & (abs(electrons.eta) < 2.5)
+            & (electrons.miniPFRelIso_all < 0.4)
+            & (electrons.cutBased >= electrons.LOOSE)
         )
         nelectronsHbb = ak.sum(goodelectronHbb, axis=1)
+        goodelectronsHbb = electrons[goodelectronHbb]
 
         goodmuonHbb = (
-            (events.Muon.pt > 10)
-            & (abs(events.Muon.eta) < 2.4)
-            & (events.Muon.pfRelIso04_all < 0.25)
-            & events.Muon.looseId
+            (muons.pt > 10) & (abs(muons.eta) < 2.4) & (muons.pfRelIso04_all < 0.25) & muons.looseId
         )
         nmuonsHbb = ak.sum(goodmuonHbb, axis=1)
+        goodmuonsHbb = muons[goodmuonHbb]
 
         # HH4b lepton vetoes:
         # https://cms.cern.ch/iCMS/user/noteinfo?cmsnoteid=CMS%20AN-2020/231 Section 7.1.2
@@ -567,20 +566,22 @@ def process(self, events: ak.Array):
         # (miniPFRelIso all < 0.4). An electron is required to pass mvaFall17V2noIso WP90 identification as well as mini-isolation (miniPFRelIso all < 0.4).
 
         goodelectronHH = (
-            (events.Electron.pt > 20)
-            & (abs(events.Electron.eta) < 2.5)
-            & (events.Electron.miniPFRelIso_all < 0.4)
-            & (events.Electron.mvaFall17V2noIso_WP90)
+            (electrons.pt > 20)
+            & (abs(electrons.eta) < 2.5)
+            & (electrons.miniPFRelIso_all < 0.4)
+            & (electrons.mvaFall17V2noIso_WP90)
         )
         nelectronsHH = ak.sum(goodelectronHH, axis=1)
+        goodelectronsHH = electrons[goodelectronHH]
 
         goodmuonHH = (
-            (events.Muon.pt > 15)
-            & (abs(events.Muon.eta) < 2.4)
-            & (events.Muon.miniPFRelIso_all < 0.4)
-            & events.Muon.looseId
+            (muons.pt > 15)
+            & (abs(muons.eta) < 2.4)
+            & (muons.miniPFRelIso_all < 0.4)
+            & muons.looseId
         )
         nmuonsHH = ak.sum(goodmuonHH, axis=1)
+        goodmuonsHH = muons[goodmuonHH]
 
         skimmed_events["nGoodElectronsHbb"] = nelectronsHbb.to_numpy()
         skimmed_events["nGoodElectronsHH"] = nelectronsHH.to_numpy()
@@ -698,15 +699,39 @@ def process(self, events: ak.Array):
                     # to check in postprocessing for xsec & lumi normalisation
                     skimmed_events["weight_noxsec"] = weight
 
-            # separate lepton ID weights for now TODO: incorporate above if lepton vetoes are useful
+            # separate lepton ID weights for now TODO: incorporate above next time if lepton vetoes are useful
             lepton_weights = Weights(len(events), storeIndividual=True)
-            add_lepton_id_weights(lepton_weights, year, goodelectronHbb, "electron", "Loose")
-            add_lepton_id_weights(lepton_weights, year, goodelectronHH, "electron", "wp90noiso")
-            add_lepton_id_weights(lepton_weights, year, goodmuon, "muon", "Loose")
+            add_lepton_id_weights(
+                lepton_weights, year, goodelectronsHbb, "electron", "Loose", label="_hbb"
+            )
+            add_lepton_id_weights(
+                lepton_weights, year, goodelectronsHH, "electron", "wp90noiso", label="_hh"
+            )
+            add_lepton_id_weights(lepton_weights, year, goodmuonsHbb, "muon", "Loose", label="_hbb")
+            add_lepton_id_weights(lepton_weights, year, goodmuonsHH, "muon", "Loose", label="_hh")
+            print("lepton weights")
+            print(lepton_weights)
+
+            lepton_weights_dict = {
+                f"single_weight_{key}": val
+                for key, val in list(lepton_weights._weights.items())
+                + list(lepton_weights._modifiers.items())
+            }
+
+            print(lepton_weights_dict)
+            skimmed_events = {**skimmed_events, **lepton_weights_dict}
+
+            # for systematic in lepton_weights._weights:
+            #     var_name = f"single_weight_{systematic}"
+            #     skimmed_events[var_name] = lepton_weights.partial_weight([var_name])
+            #     print(var_name)
+            #     print(skimmed_events[var_name])
 
-            for systematic in lepton_weights.variations:
-                var_name = f"single_weight_{systematic}"
-                skimmed_events[var_name] = lepton_weights.partial_weight([var_name])
+            # for systematic in lepton_weights._modifiers:
+            #     var_name = f"single_weight_{systematic}"
+            #     skimmed_events[var_name] = lepton_weights._modifiers[var_name]
+            #     print(var_name)
+            # print(skimmed_events[var_name])
 
         # reshape and apply selections
         sel_all = selection.all(*selection.names)
diff --git a/src/HHbbVV/processors/corrections.py b/src/HHbbVV/processors/corrections.py
index 667b553a..c9b0f56e 100644
--- a/src/HHbbVV/processors/corrections.py
+++ b/src/HHbbVV/processors/corrections.py
@@ -29,7 +29,7 @@
 import pathlib
 
 from . import utils
-from .utils import P4, pad_val
+from .utils import P4, pad_val, PAD_VAL
 
 
 package_path = str(pathlib.Path(__file__).parent.parent.resolve())
@@ -41,7 +41,7 @@
 """
 pog_correction_path = "/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/"
 pog_jsons = {
-    "muon": ["MUO", "muon_Z.json.gz"],
+    "muon": ["MUO", "muon_Z_v2.json.gz"],
     "electron": ["EGM", "electron.json.gz"],
     "pileup": ["LUM", "puWeights.json.gz"],
     "btagging": ["BTV", "btagging.json.gz"],
@@ -458,14 +458,22 @@ def add_lepton_weights(weights: Weights, year: str, lepton: MuonArray, lepton_ty
 
 # For analysis region
 def add_lepton_id_weights(
-    weights: Weights, year: str, lepton: NanoEventsArray, lepton_type: str, wp: str
+    weights: Weights,
+    year: str,
+    lepton: NanoEventsArray,
+    lepton_type: str,
+    wp: str,
+    label: str = "",
+    max_num_leptons: int = 3,
 ):
+    year = get_vfp_year(year)
     ul_year = get_UL_year(year)
 
     cset = correctionlib.CorrectionSet.from_file(get_pog_json(lepton_type, year))
 
-    lep_pt = np.array(ak.fill_none(lepton.pt, 0.0))
-    lep_eta = np.abs(np.array(ak.fill_none(lepton.eta, 0.0)))
+    lep_exists = ak.count(lepton.pt, axis=1) > 0
+    lep_pt = pad_val(lepton.pt, max_num_leptons, axis=1)
+    lep_eta = pad_val(lepton.eta, max_num_leptons, axis=1)
 
     # some voodoo from cristina
     lepton_pt, lepton_eta = _get_lepton_clipped(lep_pt, lep_eta, lepton_type)
@@ -475,10 +483,11 @@ def add_lepton_id_weights(
         # https://cms-nanoaod-integration.web.cern.ch/commonJSONSFs/summaries/EGM_2018_UL_electron.html
         cset_map = cset["UL-Electron-ID-SF"]
 
-        values["nominal"] = cset_map.evaluate(ul_year, "sf", wp, lepton_eta, lepton_pt)
-        values["up"] = cset_map.evaluate(ul_year, "systup", wp, lepton_eta, lepton_pt)
-        values["down"] = cset_map.evaluate(ul_year, "systdown", wp, lepton_eta, lepton_pt)
+        values["nominal"] = cset_map.evaluate(year, "sf", wp, lepton_eta, lepton_pt)
+        values["up"] = cset_map.evaluate(year, "sfup", wp, lepton_eta, lepton_pt)
+        values["down"] = cset_map.evaluate(year, "sfdown", wp, lepton_eta, lepton_pt)
     else:
+        # https://cms-nanoaod-integration.web.cern.ch/commonJSONSFs/summaries/MUO_2018_UL_muon_Z_v2.html
         cset_map = cset[f"NUM_{wp}ID_DEN_TrackerMuons"]
 
         values["nominal"] = cset_map.evaluate(ul_year, lepton_eta, lepton_pt, "sf")
@@ -487,11 +496,17 @@ def add_lepton_id_weights(
 
     for key, value in values.items():
         # efficiency for a single lepton passing is 1 - (1 - eff1) * (1 - eff2) * ...
+        value[lepton_pt == PAD_VAL] = 0  # if lep didn't exist, ignore efficiency
         val = 1 - np.prod(1 - value, axis=1)
+        val[~lep_exists] = 1  # if no leps in event, SF = 1
         values[key] = np.nan_to_num(val, nan=1)
 
+    print(lepton_type, label)
+    print(values)
+
     # add weights (for now only the nominal weight)
-    weights.add(f"{lepton_type}_id_{wp}", values["nominal"], values["up"], values["down"])
+    weights.add(f"{lepton_type}{label}_id_{wp}", values["nominal"], values["up"], values["down"])
+    # breakpoint()
 
 
 TOP_PDGID = 6