Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lepton ID weights #45

Merged
merged 4 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions src/HHbbVV/bash/run_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ mkdir -p tmp/test_outputs/

year=2017
extraargs=""
# extraargs="--no-inference"
extraargs="--no-inference"

python -W ignore src/run.py --processor skimmer --year $year --samples TTbar --subsamples TTToSemiLeptonic --save-systematics --starti 0 --endi 1 $extraargs
mv "0-1.parquet" tmp/test_outputs/ttsl.parquet
mv "outfiles/0-1.pkl" tmp/test_outputs/ttsl.pkl

python -W ignore src/run.py --processor skimmer --year $year --samples HH --subsamples GluGluToHHTobbVV_node_cHHH1 --save-systematics --starti 0 --endi 1 $extraargs
mv "0-1.parquet" tmp/test_outputs/hhbbvv.parquet
Expand All @@ -14,14 +18,10 @@ python -W ignore src/run.py --processor skimmer --year $year --samples XHY --sub
mv "0-1.parquet" tmp/test_outputs/xhy.parquet
mv "outfiles/0-1.pkl" tmp/test_outputs/xhy.pkl

# python -W ignore src/run.py --processor skimmer --year $year --samples TTbar --subsamples TTToSemiLeptonic --save-systematics --starti 0 --endi 1 $extraargs
# mv "0-1.parquet" tmp/test_outputs/ttsl.parquet
# mv "outfiles/0-1.pkl" tmp/test_outputs/ttsl.pkl

# python -W ignore src/run.py --processor skimmer --year $year --samples QCD --subsamples QCD_HT1000to1500 --save-systematics --starti 0 --endi 1 $extraargs
# mv "0-1.parquet" tmp/test_outputs/qcd.parquet
# mv "outfiles/0-1.pkl" tmp/test_outputs/qcd.pkl
python -W ignore src/run.py --processor skimmer --year $year --samples QCD --subsamples QCD_HT1000to1500 --save-systematics --starti 0 --endi 1 $extraargs
mv "0-1.parquet" tmp/test_outputs/qcd.parquet
mv "outfiles/0-1.pkl" tmp/test_outputs/qcd.pkl

# python -W ignore src/run.py --processor skimmer --year $year --samples "JetHT$year" --subsamples "JetHT_Run${year}D" --save-systematics --starti 0 --endi 1 $extraargs
# mv "0-1.parquet" tmp/test_outputs/data.parquet
# mv "outfiles/0-1.pkl" tmp/test_outputs/data.pkl
python -W ignore src/run.py --processor skimmer --year $year --samples "JetHT$year" --subsamples "JetHT_Run${year}D" --save-systematics --starti 0 --endi 1 $extraargs
mv "0-1.parquet" tmp/test_outputs/data.parquet
mv "outfiles/0-1.pkl" tmp/test_outputs/data.pkl
68 changes: 52 additions & 16 deletions src/HHbbVV/processors/bbVVSkimmer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
get_jec_jets,
get_jmsr,
get_lund_SFs,
add_lepton_id_weights,
)
from .common import LUMI, HLTs, btagWPs, jec_shifts, jmsr_shifts
from . import common
Expand Down Expand Up @@ -544,20 +545,19 @@ def process(self, events: ak.Array):
# https://indico.cern.ch/event/1154430/#b-471403-higgs-meeting-special

goodelectronHbb = (
(events.Electron.pt > 20)
& (abs(events.Electron.eta) < 2.5)
& (events.Electron.miniPFRelIso_all < 0.4)
& (events.Electron.cutBased >= events.Electron.LOOSE)
(electrons.pt > 20)
& (abs(electrons.eta) < 2.5)
& (electrons.miniPFRelIso_all < 0.4)
& (electrons.cutBased >= electrons.LOOSE)
)
nelectronsHbb = ak.sum(goodelectronHbb, axis=1)
goodelectronsHbb = electrons[goodelectronHbb]

goodmuonHbb = (
(events.Muon.pt > 10)
& (abs(events.Muon.eta) < 2.4)
& (events.Muon.pfRelIso04_all < 0.25)
& events.Muon.looseId
(muons.pt > 10) & (abs(muons.eta) < 2.4) & (muons.pfRelIso04_all < 0.25) & muons.looseId
)
nmuonsHbb = ak.sum(goodmuonHbb, axis=1)
goodmuonsHbb = muons[goodmuonHbb]

# HH4b lepton vetoes:
# https://cms.cern.ch/iCMS/user/noteinfo?cmsnoteid=CMS%20AN-2020/231 Section 7.1.2
Expand All @@ -566,20 +566,22 @@ def process(self, events: ak.Array):
# (miniPFRelIso all < 0.4). An electron is required to pass mvaFall17V2noIso WP90 identification as well as mini-isolation (miniPFRelIso all < 0.4).

goodelectronHH = (
(events.Electron.pt > 20)
& (abs(events.Electron.eta) < 2.5)
& (events.Electron.miniPFRelIso_all < 0.4)
& (events.Electron.mvaFall17V2noIso_WP90)
(electrons.pt > 20)
& (abs(electrons.eta) < 2.5)
& (electrons.miniPFRelIso_all < 0.4)
& (electrons.mvaFall17V2noIso_WP90)
)
nelectronsHH = ak.sum(goodelectronHH, axis=1)
goodelectronsHH = electrons[goodelectronHH]

goodmuonHH = (
(events.Muon.pt > 15)
& (abs(events.Muon.eta) < 2.4)
& (events.Muon.miniPFRelIso_all < 0.4)
& events.Muon.looseId
(muons.pt > 15)
& (abs(muons.eta) < 2.4)
& (muons.miniPFRelIso_all < 0.4)
& muons.looseId
)
nmuonsHH = ak.sum(goodmuonHH, axis=1)
goodmuonsHH = muons[goodmuonHH]

skimmed_events["nGoodElectronsHbb"] = nelectronsHbb.to_numpy()
skimmed_events["nGoodElectronsHH"] = nelectronsHH.to_numpy()
Expand Down Expand Up @@ -697,6 +699,40 @@ def process(self, events: ak.Array):
# to check in postprocessing for xsec & lumi normalisation
skimmed_events["weight_noxsec"] = weight

# separate lepton ID weights for now TODO: incorporate above next time if lepton vetoes are useful
lepton_weights = Weights(len(events), storeIndividual=True)
add_lepton_id_weights(
lepton_weights, year, goodelectronsHbb, "electron", "Loose", label="_hbb"
)
add_lepton_id_weights(
lepton_weights, year, goodelectronsHH, "electron", "wp90noiso", label="_hh"
)
add_lepton_id_weights(lepton_weights, year, goodmuonsHbb, "muon", "Loose", label="_hbb")
add_lepton_id_weights(lepton_weights, year, goodmuonsHH, "muon", "Loose", label="_hh")
print("lepton weights")
print(lepton_weights)

lepton_weights_dict = {
f"single_weight_{key}": val
for key, val in list(lepton_weights._weights.items())
+ list(lepton_weights._modifiers.items())
}

print(lepton_weights_dict)
skimmed_events = {**skimmed_events, **lepton_weights_dict}

# for systematic in lepton_weights._weights:
# var_name = f"single_weight_{systematic}"
# skimmed_events[var_name] = lepton_weights.partial_weight([var_name])
# print(var_name)
# print(skimmed_events[var_name])

# for systematic in lepton_weights._modifiers:
# var_name = f"single_weight_{systematic}"
# skimmed_events[var_name] = lepton_weights._modifiers[var_name]
# print(var_name)
# print(skimmed_events[var_name])

# reshape and apply selections
sel_all = selection.all(*selection.names)

Expand Down
58 changes: 56 additions & 2 deletions src/HHbbVV/processors/corrections.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import pathlib

from . import utils
from .utils import P4, pad_val
from .utils import P4, pad_val, PAD_VAL


package_path = str(pathlib.Path(__file__).parent.parent.resolve())
Expand All @@ -41,7 +41,7 @@
"""
pog_correction_path = "/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/"
pog_jsons = {
"muon": ["MUO", "muon_Z.json.gz"],
"muon": ["MUO", "muon_Z_v2.json.gz"],
"electron": ["EGM", "electron.json.gz"],
"pileup": ["LUM", "puWeights.json.gz"],
"btagging": ["BTV", "btagging.json.gz"],
Expand Down Expand Up @@ -432,6 +432,7 @@ def _get_lepton_clipped(lep_pt, lep_eta, lepton_type, corr=None):
return lepton_pt, lepton_eta


# Used only for validation region right now
def add_lepton_weights(weights: Weights, year: str, lepton: MuonArray, lepton_type: str = "muon"):
ul_year = get_UL_year(year)

Expand All @@ -455,6 +456,59 @@ def add_lepton_weights(weights: Weights, year: str, lepton: MuonArray, lepton_ty
weights.add(f"{lepton_type}_{corr}", values["nominal"], values["up"], values["down"])


# For analysis region
def add_lepton_id_weights(
weights: Weights,
year: str,
lepton: NanoEventsArray,
lepton_type: str,
wp: str,
label: str = "",
max_num_leptons: int = 3,
):
year = get_vfp_year(year)
ul_year = get_UL_year(year)

cset = correctionlib.CorrectionSet.from_file(get_pog_json(lepton_type, year))

lep_exists = ak.count(lepton.pt, axis=1) > 0
lep_pt = pad_val(lepton.pt, max_num_leptons, axis=1)
lep_eta = pad_val(lepton.eta, max_num_leptons, axis=1)

# some voodoo from cristina
lepton_pt, lepton_eta = _get_lepton_clipped(lep_pt, lep_eta, lepton_type)
values = {}

if lepton_type == "electron":
# https://cms-nanoaod-integration.web.cern.ch/commonJSONSFs/summaries/EGM_2018_UL_electron.html
cset_map = cset["UL-Electron-ID-SF"]

values["nominal"] = cset_map.evaluate(year, "sf", wp, lepton_eta, lepton_pt)
values["up"] = cset_map.evaluate(year, "sfup", wp, lepton_eta, lepton_pt)
values["down"] = cset_map.evaluate(year, "sfdown", wp, lepton_eta, lepton_pt)
else:
# https://cms-nanoaod-integration.web.cern.ch/commonJSONSFs/summaries/MUO_2018_UL_muon_Z_v2.html
cset_map = cset[f"NUM_{wp}ID_DEN_TrackerMuons"]

values["nominal"] = cset_map.evaluate(ul_year, lepton_eta, lepton_pt, "sf")
values["up"] = cset_map.evaluate(ul_year, lepton_eta, lepton_pt, "systup")
values["down"] = cset_map.evaluate(ul_year, lepton_eta, lepton_pt, "systdown")

for key, value in values.items():
# efficiency for a single lepton passing is 1 - (1 - eff1) * (1 - eff2) * ...
value[lepton_pt == PAD_VAL] = 0 # if lep didn't exist, ignore efficiency
val = 1 - np.prod(1 - value, axis=1)
val[~lep_exists] = 1 # if no leps in event, SF = 1
values[key] = np.nan_to_num(val, nan=1)

print(lepton_type, label)
print(values)

# add weights (for now only the nominal weight)
weights.add(f"{lepton_type}{label}_id_{wp}", values["nominal"], values["up"], values["down"])
# breakpoint()


TOP_PDGID = 6
GEN_FLAGS = ["fromHardProcess", "isLastCopy"]

Expand Down
Loading