From bbb8109f54145d4f6bad080f0b07d2fe6781c654 Mon Sep 17 00:00:00 2001 From: rkansal47 Date: Tue, 13 Feb 2024 14:47:44 -0600 Subject: [PATCH 1/3] lepton id weights --- src/HHbbVV/processors/bbVVSkimmer.py | 11 ++++++++ src/HHbbVV/processors/corrections.py | 39 ++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/src/HHbbVV/processors/bbVVSkimmer.py b/src/HHbbVV/processors/bbVVSkimmer.py index 6df3f297..0524f279 100644 --- a/src/HHbbVV/processors/bbVVSkimmer.py +++ b/src/HHbbVV/processors/bbVVSkimmer.py @@ -35,6 +35,7 @@ get_jec_jets, get_jmsr, get_lund_SFs, + add_lepton_id_weights, ) from .common import LUMI, HLTs, btagWPs, jec_shifts, jmsr_shifts from . import common @@ -668,6 +669,16 @@ def process(self, events: ak.Array): # to check in postprocessing for xsec & lumi normalisation skimmed_events["weight_noxsec"] = weight + # separate lepton ID weights for now TODO: incorporate above if lepton vetoes are useful + lepton_weights = Weights(len(events), storeIndividual=True) + add_lepton_id_weights(lepton_weights, year, goodelectronHbb, "electron", "Loose") + add_lepton_id_weights(lepton_weights, year, goodelectronHH, "electron", "wp90noiso") + add_lepton_id_weights(lepton_weights, year, goodmuon, "muon", "Loose") + + for systematic in lepton_weights.variations: + var_name = f"single_weight_{systematic}" + skimmed_events[var_name] = lepton_weights.partial_weight([var_name]) + # reshape and apply selections sel_all = selection.all(*selection.names) diff --git a/src/HHbbVV/processors/corrections.py b/src/HHbbVV/processors/corrections.py index ed2cf91e..667b553a 100644 --- a/src/HHbbVV/processors/corrections.py +++ b/src/HHbbVV/processors/corrections.py @@ -432,6 +432,7 @@ def _get_lepton_clipped(lep_pt, lep_eta, lepton_type, corr=None): return lepton_pt, lepton_eta +# Used only for validation region right now def add_lepton_weights(weights: Weights, year: str, lepton: MuonArray, lepton_type: str = "muon"): ul_year = get_UL_year(year) @@ -455,6 +456,44 @@ def add_lepton_weights(weights: Weights, year: str, lepton: MuonArray, lepton_ty weights.add(f"{lepton_type}_{corr}", values["nominal"], values["up"], values["down"]) +# For analysis region +def add_lepton_id_weights( + weights: Weights, year: str, lepton: NanoEventsArray, lepton_type: str, wp: str +): + ul_year = get_UL_year(year) + + cset = correctionlib.CorrectionSet.from_file(get_pog_json(lepton_type, year)) + + lep_pt = np.array(ak.fill_none(lepton.pt, 0.0)) + lep_eta = np.abs(np.array(ak.fill_none(lepton.eta, 0.0))) + + # some voodoo from cristina + lepton_pt, lepton_eta = _get_lepton_clipped(lep_pt, lep_eta, lepton_type) + values = {} + + if lepton_type == "electron": + # https://cms-nanoaod-integration.web.cern.ch/commonJSONSFs/summaries/EGM_2018_UL_electron.html + cset_map = cset["UL-Electron-ID-SF"] + + values["nominal"] = cset_map.evaluate(ul_year, "sf", wp, lepton_eta, lepton_pt) + values["up"] = cset_map.evaluate(ul_year, "systup", wp, lepton_eta, lepton_pt) + values["down"] = cset_map.evaluate(ul_year, "systdown", wp, lepton_eta, lepton_pt) + else: + cset_map = cset[f"NUM_{wp}ID_DEN_TrackerMuons"] + + values["nominal"] = cset_map.evaluate(ul_year, lepton_eta, lepton_pt, "sf") + values["up"] = cset_map.evaluate(ul_year, lepton_eta, lepton_pt, "systup") + values["down"] = cset_map.evaluate(ul_year, lepton_eta, lepton_pt, "systdown") + + for key, value in values.items(): + # efficiency for a single lepton passing is 1 - (1 - eff1) * (1 - eff2) * ... + val = 1 - np.prod(1 - value, axis=1) + values[key] = np.nan_to_num(val, nan=1) + + # add weights (for now only the nominal weight) + weights.add(f"{lepton_type}_id_{wp}", values["nominal"], values["up"], values["down"]) + + TOP_PDGID = 6 GEN_FLAGS = ["fromHardProcess", "isLastCopy"] From eb78c58d308efc68de0612eaf92a18b14c373d6d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 13 Feb 2024 20:49:48 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/HHbbVV/postprocessing/InferenceAnalysis.ipynb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/HHbbVV/postprocessing/InferenceAnalysis.ipynb b/src/HHbbVV/postprocessing/InferenceAnalysis.ipynb index 319a1d6d..16d7069a 100644 --- a/src/HHbbVV/postprocessing/InferenceAnalysis.ipynb +++ b/src/HHbbVV/postprocessing/InferenceAnalysis.ipynb @@ -108,9 +108,9 @@ "]\n", "\n", "for mX, mY in res_mps:\n", - " res_samples[\n", - " f\"X[{mX}]->H(bb)Y[{mY}](WW)\"\n", - " ] = f\"NMSSM_XToYH_MX{mX}_MY{mY}_HTo2bYTo2W_hadronicDecay\"\n", + " res_samples[f\"X[{mX}]->H(bb)Y[{mY}](WW)\"] = (\n", + " f\"NMSSM_XToYH_MX{mX}_MY{mY}_HTo2bYTo2W_hadronicDecay\"\n", + " )\n", "\n", "res_sig_keys = list(res_samples.keys())\n", "nonres_sig_keys = [\"HHbbVV\"]" @@ -310,6 +310,7 @@ " sample2...\n", "}\n", "\"\"\"\n", + "\n", "pt_key = \"Pt\"\n", "msd_key = \"Msd\"\n", "var_prefix = \"ak8FatJet\"\n", From 667c598ef13f37d1e8d275bd61636a81bd531470 Mon Sep 17 00:00:00 2001 From: rkansal47 Date: Fri, 23 Feb 2024 11:41:45 -0600 Subject: [PATCH 3/3] bug fixes --- src/HHbbVV/bash/run_local.sh | 22 ++++----- src/HHbbVV/processors/bbVVSkimmer.py | 71 +++++++++++++++++++--------- src/HHbbVV/processors/corrections.py | 33 +++++++++---- 3 files changed, 83 insertions(+), 43 deletions(-) diff --git a/src/HHbbVV/bash/run_local.sh b/src/HHbbVV/bash/run_local.sh index c3e3ac42..d10bb200 100755 --- a/src/HHbbVV/bash/run_local.sh +++ b/src/HHbbVV/bash/run_local.sh @@ -4,7 +4,11 @@ mkdir -p tmp/test_outputs/ year=2017 extraargs="" -# extraargs="--no-inference" +extraargs="--no-inference" + +python -W ignore src/run.py --processor skimmer --year $year --samples TTbar --subsamples TTToSemiLeptonic --save-systematics --starti 0 --endi 1 $extraargs +mv "0-1.parquet" tmp/test_outputs/ttsl.parquet +mv "outfiles/0-1.pkl" tmp/test_outputs/ttsl.pkl python -W ignore src/run.py --processor skimmer --year $year --samples HH --subsamples GluGluToHHTobbVV_node_cHHH1 --save-systematics --starti 0 --endi 1 $extraargs mv "0-1.parquet" tmp/test_outputs/hhbbvv.parquet @@ -14,14 +18,10 @@ python -W ignore src/run.py --processor skimmer --year $year --samples XHY --sub mv "0-1.parquet" tmp/test_outputs/xhy.parquet mv "outfiles/0-1.pkl" tmp/test_outputs/xhy.pkl -# python -W ignore src/run.py --processor skimmer --year $year --samples TTbar --subsamples TTToSemiLeptonic --save-systematics --starti 0 --endi 1 $extraargs -# mv "0-1.parquet" tmp/test_outputs/ttsl.parquet -# mv "outfiles/0-1.pkl" tmp/test_outputs/ttsl.pkl - -# python -W ignore src/run.py --processor skimmer --year $year --samples QCD --subsamples QCD_HT1000to1500 --save-systematics --starti 0 --endi 1 $extraargs -# mv "0-1.parquet" tmp/test_outputs/qcd.parquet -# mv "outfiles/0-1.pkl" tmp/test_outputs/qcd.pkl +python -W ignore src/run.py --processor skimmer --year $year --samples QCD --subsamples QCD_HT1000to1500 --save-systematics --starti 0 --endi 1 $extraargs +mv "0-1.parquet" tmp/test_outputs/qcd.parquet +mv "outfiles/0-1.pkl" tmp/test_outputs/qcd.pkl -# python -W ignore src/run.py --processor skimmer --year $year --samples "JetHT$year" --subsamples "JetHT_Run${year}D" --save-systematics --starti 0 --endi 1 $extraargs -# mv "0-1.parquet" tmp/test_outputs/data.parquet -# mv "outfiles/0-1.pkl" tmp/test_outputs/data.pkl \ No newline at end of file +python -W ignore src/run.py --processor skimmer --year $year --samples "JetHT$year" --subsamples "JetHT_Run${year}D" --save-systematics --starti 0 --endi 1 $extraargs +mv "0-1.parquet" tmp/test_outputs/data.parquet +mv "outfiles/0-1.pkl" tmp/test_outputs/data.pkl \ No newline at end of file diff --git a/src/HHbbVV/processors/bbVVSkimmer.py b/src/HHbbVV/processors/bbVVSkimmer.py index f7df3297..d51aebc3 100644 --- a/src/HHbbVV/processors/bbVVSkimmer.py +++ b/src/HHbbVV/processors/bbVVSkimmer.py @@ -545,20 +545,19 @@ def process(self, events: ak.Array): # https://indico.cern.ch/event/1154430/#b-471403-higgs-meeting-special goodelectronHbb = ( - (events.Electron.pt > 20) - & (abs(events.Electron.eta) < 2.5) - & (events.Electron.miniPFRelIso_all < 0.4) - & (events.Electron.cutBased >= events.Electron.LOOSE) + (electrons.pt > 20) + & (abs(electrons.eta) < 2.5) + & (electrons.miniPFRelIso_all < 0.4) + & (electrons.cutBased >= electrons.LOOSE) ) nelectronsHbb = ak.sum(goodelectronHbb, axis=1) + goodelectronsHbb = electrons[goodelectronHbb] goodmuonHbb = ( - (events.Muon.pt > 10) - & (abs(events.Muon.eta) < 2.4) - & (events.Muon.pfRelIso04_all < 0.25) - & events.Muon.looseId + (muons.pt > 10) & (abs(muons.eta) < 2.4) & (muons.pfRelIso04_all < 0.25) & muons.looseId ) nmuonsHbb = ak.sum(goodmuonHbb, axis=1) + goodmuonsHbb = muons[goodmuonHbb] # HH4b lepton vetoes: # https://cms.cern.ch/iCMS/user/noteinfo?cmsnoteid=CMS%20AN-2020/231 Section 7.1.2 @@ -567,20 +566,22 @@ def process(self, events: ak.Array): # (miniPFRelIso all < 0.4). An electron is required to pass mvaFall17V2noIso WP90 identification as well as mini-isolation (miniPFRelIso all < 0.4). goodelectronHH = ( - (events.Electron.pt > 20) - & (abs(events.Electron.eta) < 2.5) - & (events.Electron.miniPFRelIso_all < 0.4) - & (events.Electron.mvaFall17V2noIso_WP90) + (electrons.pt > 20) + & (abs(electrons.eta) < 2.5) + & (electrons.miniPFRelIso_all < 0.4) + & (electrons.mvaFall17V2noIso_WP90) ) nelectronsHH = ak.sum(goodelectronHH, axis=1) + goodelectronsHH = electrons[goodelectronHH] goodmuonHH = ( - (events.Muon.pt > 15) - & (abs(events.Muon.eta) < 2.4) - & (events.Muon.miniPFRelIso_all < 0.4) - & events.Muon.looseId + (muons.pt > 15) + & (abs(muons.eta) < 2.4) + & (muons.miniPFRelIso_all < 0.4) + & muons.looseId ) nmuonsHH = ak.sum(goodmuonHH, axis=1) + goodmuonsHH = muons[goodmuonHH] skimmed_events["nGoodElectronsHbb"] = nelectronsHbb.to_numpy() skimmed_events["nGoodElectronsHH"] = nelectronsHH.to_numpy() @@ -698,15 +699,39 @@ def process(self, events: ak.Array): # to check in postprocessing for xsec & lumi normalisation skimmed_events["weight_noxsec"] = weight - # separate lepton ID weights for now TODO: incorporate above if lepton vetoes are useful + # separate lepton ID weights for now TODO: incorporate above next time if lepton vetoes are useful lepton_weights = Weights(len(events), storeIndividual=True) - add_lepton_id_weights(lepton_weights, year, goodelectronHbb, "electron", "Loose") - add_lepton_id_weights(lepton_weights, year, goodelectronHH, "electron", "wp90noiso") - add_lepton_id_weights(lepton_weights, year, goodmuon, "muon", "Loose") + add_lepton_id_weights( + lepton_weights, year, goodelectronsHbb, "electron", "Loose", label="_hbb" + ) + add_lepton_id_weights( + lepton_weights, year, goodelectronsHH, "electron", "wp90noiso", label="_hh" + ) + add_lepton_id_weights(lepton_weights, year, goodmuonsHbb, "muon", "Loose", label="_hbb") + add_lepton_id_weights(lepton_weights, year, goodmuonsHH, "muon", "Loose", label="_hh") + print("lepton weights") + print(lepton_weights) + + lepton_weights_dict = { + f"single_weight_{key}": val + for key, val in list(lepton_weights._weights.items()) + + list(lepton_weights._modifiers.items()) + } + + print(lepton_weights_dict) + skimmed_events = {**skimmed_events, **lepton_weights_dict} + + # for systematic in lepton_weights._weights: + # var_name = f"single_weight_{systematic}" + # skimmed_events[var_name] = lepton_weights.partial_weight([var_name]) + # print(var_name) + # print(skimmed_events[var_name]) - for systematic in lepton_weights.variations: - var_name = f"single_weight_{systematic}" - skimmed_events[var_name] = lepton_weights.partial_weight([var_name]) + # for systematic in lepton_weights._modifiers: + # var_name = f"single_weight_{systematic}" + # skimmed_events[var_name] = lepton_weights._modifiers[var_name] + # print(var_name) + # print(skimmed_events[var_name]) # reshape and apply selections sel_all = selection.all(*selection.names) diff --git a/src/HHbbVV/processors/corrections.py b/src/HHbbVV/processors/corrections.py index 667b553a..c9b0f56e 100644 --- a/src/HHbbVV/processors/corrections.py +++ b/src/HHbbVV/processors/corrections.py @@ -29,7 +29,7 @@ import pathlib from . import utils -from .utils import P4, pad_val +from .utils import P4, pad_val, PAD_VAL package_path = str(pathlib.Path(__file__).parent.parent.resolve()) @@ -41,7 +41,7 @@ """ pog_correction_path = "/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/" pog_jsons = { - "muon": ["MUO", "muon_Z.json.gz"], + "muon": ["MUO", "muon_Z_v2.json.gz"], "electron": ["EGM", "electron.json.gz"], "pileup": ["LUM", "puWeights.json.gz"], "btagging": ["BTV", "btagging.json.gz"], @@ -458,14 +458,22 @@ def add_lepton_weights(weights: Weights, year: str, lepton: MuonArray, lepton_ty # For analysis region def add_lepton_id_weights( - weights: Weights, year: str, lepton: NanoEventsArray, lepton_type: str, wp: str + weights: Weights, + year: str, + lepton: NanoEventsArray, + lepton_type: str, + wp: str, + label: str = "", + max_num_leptons: int = 3, ): + year = get_vfp_year(year) ul_year = get_UL_year(year) cset = correctionlib.CorrectionSet.from_file(get_pog_json(lepton_type, year)) - lep_pt = np.array(ak.fill_none(lepton.pt, 0.0)) - lep_eta = np.abs(np.array(ak.fill_none(lepton.eta, 0.0))) + lep_exists = ak.count(lepton.pt, axis=1) > 0 + lep_pt = pad_val(lepton.pt, max_num_leptons, axis=1) + lep_eta = pad_val(lepton.eta, max_num_leptons, axis=1) # some voodoo from cristina lepton_pt, lepton_eta = _get_lepton_clipped(lep_pt, lep_eta, lepton_type) @@ -475,10 +483,11 @@ def add_lepton_id_weights( # https://cms-nanoaod-integration.web.cern.ch/commonJSONSFs/summaries/EGM_2018_UL_electron.html cset_map = cset["UL-Electron-ID-SF"] - values["nominal"] = cset_map.evaluate(ul_year, "sf", wp, lepton_eta, lepton_pt) - values["up"] = cset_map.evaluate(ul_year, "systup", wp, lepton_eta, lepton_pt) - values["down"] = cset_map.evaluate(ul_year, "systdown", wp, lepton_eta, lepton_pt) + values["nominal"] = cset_map.evaluate(year, "sf", wp, lepton_eta, lepton_pt) + values["up"] = cset_map.evaluate(year, "sfup", wp, lepton_eta, lepton_pt) + values["down"] = cset_map.evaluate(year, "sfdown", wp, lepton_eta, lepton_pt) else: + # https://cms-nanoaod-integration.web.cern.ch/commonJSONSFs/summaries/MUO_2018_UL_muon_Z_v2.html cset_map = cset[f"NUM_{wp}ID_DEN_TrackerMuons"] values["nominal"] = cset_map.evaluate(ul_year, lepton_eta, lepton_pt, "sf") @@ -487,11 +496,17 @@ def add_lepton_id_weights( for key, value in values.items(): # efficiency for a single lepton passing is 1 - (1 - eff1) * (1 - eff2) * ... + value[lepton_pt == PAD_VAL] = 0 # if lep didn't exist, ignore efficiency val = 1 - np.prod(1 - value, axis=1) + val[~lep_exists] = 1 # if no leps in event, SF = 1 values[key] = np.nan_to_num(val, nan=1) + print(lepton_type, label) + print(values) + # add weights (for now only the nominal weight) - weights.add(f"{lepton_type}_id_{wp}", values["nominal"], values["up"], values["down"]) + weights.add(f"{lepton_type}{label}_id_{wp}", values["nominal"], values["up"], values["down"]) + # breakpoint() TOP_PDGID = 6