rkansal47 · rkansal47 · Feb 23, 2024 · Feb 13, 2024 · Feb 13, 2024 · Feb 22, 2024
diff --git a/src/HHbbVV/bash/run_local.sh b/src/HHbbVV/bash/run_local.sh
@@ -4,7 +4,11 @@ mkdir -p tmp/test_outputs/
 
 year=2017
 extraargs=""
-# extraargs="--no-inference"
+extraargs="--no-inference"
+
+python -W ignore src/run.py --processor skimmer --year $year --samples TTbar --subsamples TTToSemiLeptonic --save-systematics --starti 0 --endi 1 $extraargs
+mv "0-1.parquet" tmp/test_outputs/ttsl.parquet
+mv "outfiles/0-1.pkl" tmp/test_outputs/ttsl.pkl
 
 python -W ignore src/run.py --processor skimmer --year $year --samples HH --subsamples GluGluToHHTobbVV_node_cHHH1 --save-systematics --starti 0 --endi 1 $extraargs
 mv "0-1.parquet" tmp/test_outputs/hhbbvv.parquet
@@ -14,14 +18,10 @@ python -W ignore src/run.py --processor skimmer --year $year --samples XHY --sub
 mv "0-1.parquet" tmp/test_outputs/xhy.parquet
 mv "outfiles/0-1.pkl" tmp/test_outputs/xhy.pkl
 
-# python -W ignore src/run.py --processor skimmer --year $year --samples TTbar --subsamples TTToSemiLeptonic --save-systematics --starti 0 --endi 1 $extraargs
-# mv "0-1.parquet" tmp/test_outputs/ttsl.parquet
-# mv "outfiles/0-1.pkl" tmp/test_outputs/ttsl.pkl
-
-# python -W ignore src/run.py --processor skimmer --year $year --samples QCD --subsamples QCD_HT1000to1500 --save-systematics --starti 0 --endi 1 $extraargs
-# mv "0-1.parquet" tmp/test_outputs/qcd.parquet
-# mv "outfiles/0-1.pkl" tmp/test_outputs/qcd.pkl
+python -W ignore src/run.py --processor skimmer --year $year --samples QCD --subsamples QCD_HT1000to1500 --save-systematics --starti 0 --endi 1 $extraargs
+mv "0-1.parquet" tmp/test_outputs/qcd.parquet
+mv "outfiles/0-1.pkl" tmp/test_outputs/qcd.pkl
 
-# python -W ignore src/run.py --processor skimmer --year $year --samples "JetHT$year" --subsamples "JetHT_Run${year}D" --save-systematics --starti 0 --endi 1 $extraargs
-# mv "0-1.parquet" tmp/test_outputs/data.parquet
-# mv "outfiles/0-1.pkl" tmp/test_outputs/data.pkl
+python -W ignore src/run.py --processor skimmer --year $year --samples "JetHT$year" --subsamples "JetHT_Run${year}D" --save-systematics --starti 0 --endi 1 $extraargs
+mv "0-1.parquet" tmp/test_outputs/data.parquet
+mv "outfiles/0-1.pkl" tmp/test_outputs/data.pkl
diff --git a/src/HHbbVV/processors/bbVVSkimmer.py b/src/HHbbVV/processors/bbVVSkimmer.py
@@ -35,6 +35,7 @@
     get_jec_jets,
     get_jmsr,
     get_lund_SFs,
+    add_lepton_id_weights,
 )
 from .common import LUMI, HLTs, btagWPs, jec_shifts, jmsr_shifts
 from . import common
@@ -544,20 +545,19 @@ def process(self, events: ak.Array):
         # https://indico.cern.ch/event/1154430/#b-471403-higgs-meeting-special
 
         goodelectronHbb = (
-            (events.Electron.pt > 20)
-            & (abs(events.Electron.eta) < 2.5)
-            & (events.Electron.miniPFRelIso_all < 0.4)
-            & (events.Electron.cutBased >= events.Electron.LOOSE)
+            (electrons.pt > 20)
+            & (abs(electrons.eta) < 2.5)
+            & (electrons.miniPFRelIso_all < 0.4)
+            & (electrons.cutBased >= electrons.LOOSE)
         )
         nelectronsHbb = ak.sum(goodelectronHbb, axis=1)
+        goodelectronsHbb = electrons[goodelectronHbb]
 
         goodmuonHbb = (
-            (events.Muon.pt > 10)
-            & (abs(events.Muon.eta) < 2.4)
-            & (events.Muon.pfRelIso04_all < 0.25)
-            & events.Muon.looseId
+            (muons.pt > 10) & (abs(muons.eta) < 2.4) & (muons.pfRelIso04_all < 0.25) & muons.looseId
         )
         nmuonsHbb = ak.sum(goodmuonHbb, axis=1)
+        goodmuonsHbb = muons[goodmuonHbb]
 
         # HH4b lepton vetoes:
         # https://cms.cern.ch/iCMS/user/noteinfo?cmsnoteid=CMS%20AN-2020/231 Section 7.1.2
@@ -566,20 +566,22 @@ def process(self, events: ak.Array):
         # (miniPFRelIso all < 0.4). An electron is required to pass mvaFall17V2noIso WP90 identification as well as mini-isolation (miniPFRelIso all < 0.4).
 
         goodelectronHH = (
-            (events.Electron.pt > 20)
-            & (abs(events.Electron.eta) < 2.5)
-            & (events.Electron.miniPFRelIso_all < 0.4)
-            & (events.Electron.mvaFall17V2noIso_WP90)
+            (electrons.pt > 20)
+            & (abs(electrons.eta) < 2.5)
+            & (electrons.miniPFRelIso_all < 0.4)
+            & (electrons.mvaFall17V2noIso_WP90)
         )
         nelectronsHH = ak.sum(goodelectronHH, axis=1)
+        goodelectronsHH = electrons[goodelectronHH]
 
         goodmuonHH = (
-            (events.Muon.pt > 15)
-            & (abs(events.Muon.eta) < 2.4)
-            & (events.Muon.miniPFRelIso_all < 0.4)
-            & events.Muon.looseId
+            (muons.pt > 15)
+            & (abs(muons.eta) < 2.4)
+            & (muons.miniPFRelIso_all < 0.4)
+            & muons.looseId
         )
         nmuonsHH = ak.sum(goodmuonHH, axis=1)
+        goodmuonsHH = muons[goodmuonHH]
 
         skimmed_events["nGoodElectronsHbb"] = nelectronsHbb.to_numpy()
         skimmed_events["nGoodElectronsHH"] = nelectronsHH.to_numpy()
@@ -697,6 +699,40 @@ def process(self, events: ak.Array):
                     # to check in postprocessing for xsec & lumi normalisation
                     skimmed_events["weight_noxsec"] = weight
 
+            # separate lepton ID weights for now TODO: incorporate above next time if lepton vetoes are useful
+            lepton_weights = Weights(len(events), storeIndividual=True)
+            add_lepton_id_weights(
+                lepton_weights, year, goodelectronsHbb, "electron", "Loose", label="_hbb"
+            )
+            add_lepton_id_weights(
+                lepton_weights, year, goodelectronsHH, "electron", "wp90noiso", label="_hh"
+            )
+            add_lepton_id_weights(lepton_weights, year, goodmuonsHbb, "muon", "Loose", label="_hbb")
+            add_lepton_id_weights(lepton_weights, year, goodmuonsHH, "muon", "Loose", label="_hh")
+            print("lepton weights")
+            print(lepton_weights)
+
+            lepton_weights_dict = {
+                f"single_weight_{key}": val
+                for key, val in list(lepton_weights._weights.items())
+                + list(lepton_weights._modifiers.items())
+            }
+
+            print(lepton_weights_dict)
+            skimmed_events = {**skimmed_events, **lepton_weights_dict}
+
+            # for systematic in lepton_weights._weights:
+            #     var_name = f"single_weight_{systematic}"
+            #     skimmed_events[var_name] = lepton_weights.partial_weight([var_name])
+            #     print(var_name)
+            #     print(skimmed_events[var_name])
+
+            # for systematic in lepton_weights._modifiers:
+            #     var_name = f"single_weight_{systematic}"
+            #     skimmed_events[var_name] = lepton_weights._modifiers[var_name]
+            #     print(var_name)
+            # print(skimmed_events[var_name])
+
         # reshape and apply selections
         sel_all = selection.all(*selection.names)
 

diff --git a/src/HHbbVV/processors/corrections.py b/src/HHbbVV/processors/corrections.py
@@ -29,7 +29,7 @@
 import pathlib
 
 from . import utils
-from .utils import P4, pad_val
+from .utils import P4, pad_val, PAD_VAL
 
 
 package_path = str(pathlib.Path(__file__).parent.parent.resolve())
@@ -41,7 +41,7 @@
 """
 pog_correction_path = "/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/"
 pog_jsons = {
-    "muon": ["MUO", "muon_Z.json.gz"],
+    "muon": ["MUO", "muon_Z_v2.json.gz"],
     "electron": ["EGM", "electron.json.gz"],
     "pileup": ["LUM", "puWeights.json.gz"],
     "btagging": ["BTV", "btagging.json.gz"],
@@ -432,6 +432,7 @@ def _get_lepton_clipped(lep_pt, lep_eta, lepton_type, corr=None):
     return lepton_pt, lepton_eta
 
 
+# Used only for validation region right now
 def add_lepton_weights(weights: Weights, year: str, lepton: MuonArray, lepton_type: str = "muon"):
     ul_year = get_UL_year(year)
 
@@ -455,6 +456,59 @@ def add_lepton_weights(weights: Weights, year: str, lepton: MuonArray, lepton_ty
         weights.add(f"{lepton_type}_{corr}", values["nominal"], values["up"], values["down"])
 
 
+# For analysis region
+def add_lepton_id_weights(
+    weights: Weights,
+    year: str,
+    lepton: NanoEventsArray,
+    lepton_type: str,
+    wp: str,
+    label: str = "",
+    max_num_leptons: int = 3,
+):
+    year = get_vfp_year(year)
+    ul_year = get_UL_year(year)
+
+    cset = correctionlib.CorrectionSet.from_file(get_pog_json(lepton_type, year))
+
+    lep_exists = ak.count(lepton.pt, axis=1) > 0
+    lep_pt = pad_val(lepton.pt, max_num_leptons, axis=1)
+    lep_eta = pad_val(lepton.eta, max_num_leptons, axis=1)
+
+    # some voodoo from cristina
+    lepton_pt, lepton_eta = _get_lepton_clipped(lep_pt, lep_eta, lepton_type)
+    values = {}
+
+    if lepton_type == "electron":
+        # https://cms-nanoaod-integration.web.cern.ch/commonJSONSFs/summaries/EGM_2018_UL_electron.html
+        cset_map = cset["UL-Electron-ID-SF"]
+
+        values["nominal"] = cset_map.evaluate(year, "sf", wp, lepton_eta, lepton_pt)
+        values["up"] = cset_map.evaluate(year, "sfup", wp, lepton_eta, lepton_pt)
+        values["down"] = cset_map.evaluate(year, "sfdown", wp, lepton_eta, lepton_pt)
+    else:
+        # https://cms-nanoaod-integration.web.cern.ch/commonJSONSFs/summaries/MUO_2018_UL_muon_Z_v2.html
+        cset_map = cset[f"NUM_{wp}ID_DEN_TrackerMuons"]
+
+        values["nominal"] = cset_map.evaluate(ul_year, lepton_eta, lepton_pt, "sf")
+        values["up"] = cset_map.evaluate(ul_year, lepton_eta, lepton_pt, "systup")
+        values["down"] = cset_map.evaluate(ul_year, lepton_eta, lepton_pt, "systdown")
+
+    for key, value in values.items():
+        # efficiency for a single lepton passing is 1 - (1 - eff1) * (1 - eff2) * ...
+        value[lepton_pt == PAD_VAL] = 0  # if lep didn't exist, ignore efficiency
+        val = 1 - np.prod(1 - value, axis=1)
+        val[~lep_exists] = 1  # if no leps in event, SF = 1
+        values[key] = np.nan_to_num(val, nan=1)
+
+    print(lepton_type, label)
+    print(values)
+
+    # add weights (for now only the nominal weight)
+    weights.add(f"{lepton_type}{label}_id_{wp}", values["nominal"], values["up"], values["down"])
+    # breakpoint()
+
+
 TOP_PDGID = 6
 GEN_FLAGS = ["fromHardProcess", "isLastCopy"]